JaishnaCodz commited on
Commit
e9d2c6d
·
verified ·
1 Parent(s): 911c134

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -83
app.py CHANGED
@@ -5,111 +5,83 @@ import nltk
5
  from nltk.tokenize import sent_tokenize
6
  import re
7
 
8
- # Download punkt tokenizer
9
- nltk.download("punkt")
10
 
11
- # Load Hugging Face pipelines
12
- grammar_corrector = pipeline("text2text-generation", model="pszemraj/flan-t5-base-grammar-synthesis")
13
- toxicity_detector = pipeline("text-classification", model="unitary/toxic-bert")
14
 
15
- # Utility: extract blog text from URL
16
  def extract_text(input_type, text_input, url_input):
17
- if input_type == "Text":
18
- return text_input
19
- elif input_type == "URL":
20
  try:
21
  article = Article(url_input)
22
  article.download()
23
  article.parse()
24
  return article.text
25
  except Exception as e:
26
- return f"Error extracting article: {str(e)}"
27
- return ""
28
-
29
- # Highlight grammar & sensitive content
30
- def highlight_text(text, grammar_sentences, toxic_sentences):
31
- for s in grammar_sentences:
32
- text = text.replace(s, f"<span style='background-color: yellow'>{s}</span>")
33
- for s in toxic_sentences:
34
- text = text.replace(s, f"<span style='background-color: red'>{s}</span>")
35
- return text
36
-
37
- # Main blog review function
38
- def review_blog(input_type, text_input, url_input):
39
- text = extract_text(input_type, text_input, url_input)
40
- if not text or text.startswith("Error"):
41
- return text, "", []
42
 
 
43
  sentences = sent_tokenize(text)
44
- grammar_issues = []
45
- toxic_issues = []
46
- suggestions = []
47
-
48
- for sent in sentences:
49
- # Check grammar by comparing original and corrected
50
- corrected = grammar_corrector(sent, max_length=128, do_sample=False)[0]['generated_text']
51
- if corrected.strip() != sent.strip():
52
- grammar_issues.append(sent)
53
- suggestions.append(f"Grammar: Replace '{sent}' → '{corrected}'")
54
-
55
- # Check for toxicity
56
- result = toxicity_detector(sent)
57
  if result[0]['label'] == 'toxic' and result[0]['score'] > 0.7:
58
- toxic_issues.append(sent)
59
- suggestions.append(f"Toxicity: Rephrase '{sent}' (score: {result[0]['score']:.2f})")
 
 
 
 
 
 
 
 
 
 
 
60
 
61
- highlighted = highlight_text(text, grammar_issues, toxic_issues)
62
- sug_text = "\n".join(f"{i+1}. {s}" for i, s in enumerate(suggestions))
63
- return highlighted, sug_text, suggestions
 
64
 
65
- # Apply approved suggestions
66
- def apply_changes(original_text, suggestions, indices):
67
- try:
68
- indices = [int(i.strip()) - 1 for i in indices.split(",") if i.strip().isdigit()]
69
- sentences = sent_tokenize(original_text)
70
- for i in indices:
71
- if i < len(suggestions):
72
- match = re.search(r"'(.*?)'\s*→\s*'(.*?)'", suggestions[i])
73
- if match:
74
- old = match.group(1)
75
- new = match.group(2)
76
- original_text = original_text.replace(old, new)
77
- return original_text
78
- except Exception as e:
79
- return f"Error applying changes: {str(e)}"
80
 
81
  # Gradio UI
82
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
83
- gr.Markdown("## Blog Content Reviewer (LLM-powered)")
84
- gr.Markdown("Paste blog content or a blog URL. AI will detect grammar issues & sensitive content.")
85
 
86
- input_type = gr.Radio(["Text", "URL"], value="Text", label="Input Type")
87
  text_input = gr.Textbox(label="Blog Text", lines=10, visible=True)
88
  url_input = gr.Textbox(label="Blog URL", visible=False)
89
 
90
- input_type.change(lambda t: {text_input: gr.update(visible=t=="Text"),
91
- url_input: gr.update(visible=t=="URL")},
92
- input_type, [text_input, url_input])
93
-
94
- review_button = gr.Button("Review")
95
- highlighted_output = gr.HTML()
96
- suggestions_output = gr.Textbox(label="Suggestions", lines=10)
97
- approve_input = gr.Textbox(label="Approve Suggestions (e.g., 1,2)")
98
- apply_button = gr.Button("Apply Changes")
99
- final_text = gr.Textbox(label="Final Output", lines=10)
100
 
101
- suggestions_state = gr.State()
102
 
103
- review_button.click(
104
- fn=review_blog,
105
- inputs=[input_type, text_input, url_input],
106
- outputs=[highlighted_output, suggestions_output, suggestions_state]
107
- )
108
 
109
- apply_button.click(
110
- fn=apply_changes,
111
- inputs=[text_input, suggestions_state, approve_input],
112
- outputs=[final_text]
113
- )
114
 
115
- demo.launch()
 
 
5
  from nltk.tokenize import sent_tokenize
6
  import re
7
 
8
+ # Download punkt tokenizer for sentence splitting
9
+ nltk.download('punkt')
10
 
11
+ # Load grammar correction and toxicity detection models
12
+ grammar_corrector = pipeline("text2text-generation", model="vennify/t5-base-grammar-correction")
13
+ toxicity_classifier = pipeline("text-classification", model="unitary/toxic-bert")
14
 
 
15
  def extract_text(input_type, text_input, url_input):
16
+ if input_type == "URL" and url_input:
 
 
17
  try:
18
  article = Article(url_input)
19
  article.download()
20
  article.parse()
21
  return article.text
22
  except Exception as e:
23
+ return f"Error extracting from URL: {str(e)}"
24
+ return text_input
25
+
26
+ def check_grammar(text):
27
+ try:
28
+ corrected = grammar_corrector(text, max_length=512)[0]['generated_text']
29
+ return corrected
30
+ except Exception as e:
31
+ return f"Error in grammar correction: {str(e)}"
 
 
 
 
 
 
 
32
 
33
+ def detect_sensitive_content(text):
34
  sentences = sent_tokenize(text)
35
+ sensitive = []
36
+ for i, sentence in enumerate(sentences):
37
+ result = toxicity_classifier(sentence)
 
 
 
 
 
 
 
 
 
 
38
  if result[0]['label'] == 'toxic' and result[0]['score'] > 0.7:
39
+ sensitive.append({
40
+ "sentence": sentence,
41
+ "score": result[0]['score'],
42
+ "index": i
43
+ })
44
+ return sensitive
45
+
46
+ def highlight_sensitive(text, sensitive_issues):
47
+ highlighted = text
48
+ for issue in sensitive_issues:
49
+ sentence = issue['sentence']
50
+ highlighted = highlighted.replace(sentence, f"<span style='background-color:red'>{sentence}</span>")
51
+ return highlighted
52
 
53
+ def review_blog(input_type, text_input, url_input):
54
+ text = extract_text(input_type, text_input, url_input)
55
+ if text.startswith("Error"):
56
+ return text, text
57
 
58
+ corrected_text = check_grammar(text)
59
+ sensitive_issues = detect_sensitive_content(corrected_text)
60
+ highlighted = highlight_sensitive(corrected_text, sensitive_issues)
61
+
62
+ return highlighted, corrected_text
 
 
 
 
 
 
 
 
 
 
63
 
64
  # Gradio UI
65
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
66
+ gr.Markdown("# Blog Grammar & Toxicity Reviewer")
 
67
 
68
+ input_type = gr.Radio(["Text", "URL"], label="Input Type", value="Text")
69
  text_input = gr.Textbox(label="Blog Text", lines=10, visible=True)
70
  url_input = gr.Textbox(label="Blog URL", visible=False)
71
 
72
+ def toggle_inputs(choice):
73
+ return {
74
+ text_input: gr.update(visible=choice == "Text"),
75
+ url_input: gr.update(visible=choice == "URL")
76
+ }
 
 
 
 
 
77
 
78
+ input_type.change(fn=toggle_inputs, inputs=input_type, outputs=[text_input, url_input])
79
 
80
+ review_button = gr.Button("Review Blog")
81
+ highlighted_output = gr.HTML(label="Highlighted Output (Red = Sensitive)")
82
+ corrected_output = gr.Textbox(label="Corrected Text", lines=10)
 
 
83
 
84
+ review_button.click(fn=review_blog, inputs=[input_type, text_input, url_input], outputs=[highlighted_output, corrected_output])
 
 
 
 
85
 
86
+ if __name__ == "__main__":
87
+ demo.launch()