JaishnaCodz commited on
Commit
2e49382
·
verified ·
1 Parent(s): e9d2c6d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -59
app.py CHANGED
@@ -3,85 +3,72 @@ from transformers import pipeline
3
  from newspaper import Article
4
  import nltk
5
  from nltk.tokenize import sent_tokenize
6
- import re
7
 
8
- # Download punkt tokenizer for sentence splitting
9
- nltk.download('punkt')
10
 
11
- # Load grammar correction and toxicity detection models
12
  grammar_corrector = pipeline("text2text-generation", model="vennify/t5-base-grammar-correction")
13
  toxicity_classifier = pipeline("text-classification", model="unitary/toxic-bert")
14
 
 
15
  def extract_text(input_type, text_input, url_input):
16
- if input_type == "URL" and url_input:
17
- try:
18
- article = Article(url_input)
19
- article.download()
20
- article.parse()
21
- return article.text
22
- except Exception as e:
23
- return f"Error extracting from URL: {str(e)}"
24
- return text_input
25
-
26
- def check_grammar(text):
27
  try:
28
- corrected = grammar_corrector(text, max_length=512)[0]['generated_text']
29
- return corrected
 
 
30
  except Exception as e:
31
- return f"Error in grammar correction: {str(e)}"
32
-
33
- def detect_sensitive_content(text):
34
- sentences = sent_tokenize(text)
35
- sensitive = []
36
- for i, sentence in enumerate(sentences):
37
- result = toxicity_classifier(sentence)
38
- if result[0]['label'] == 'toxic' and result[0]['score'] > 0.7:
39
- sensitive.append({
40
- "sentence": sentence,
41
- "score": result[0]['score'],
42
- "index": i
43
- })
44
- return sensitive
45
-
46
- def highlight_sensitive(text, sensitive_issues):
47
- highlighted = text
48
- for issue in sensitive_issues:
49
- sentence = issue['sentence']
50
- highlighted = highlighted.replace(sentence, f"<span style='background-color:red'>{sentence}</span>")
51
- return highlighted
52
 
 
53
  def review_blog(input_type, text_input, url_input):
54
  text = extract_text(input_type, text_input, url_input)
55
  if text.startswith("Error"):
56
- return text, text
57
 
58
- corrected_text = check_grammar(text)
59
- sensitive_issues = detect_sensitive_content(corrected_text)
60
- highlighted = highlight_sensitive(corrected_text, sensitive_issues)
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
- return highlighted, corrected_text
63
 
64
  # Gradio UI
65
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
66
- gr.Markdown("# Blog Grammar & Toxicity Reviewer")
 
67
 
68
- input_type = gr.Radio(["Text", "URL"], label="Input Type", value="Text")
69
- text_input = gr.Textbox(label="Blog Text", lines=10, visible=True)
70
- url_input = gr.Textbox(label="Blog URL", visible=False)
71
 
72
- def toggle_inputs(choice):
73
- return {
74
- text_input: gr.update(visible=choice == "Text"),
75
- url_input: gr.update(visible=choice == "URL")
76
- }
77
 
78
- input_type.change(fn=toggle_inputs, inputs=input_type, outputs=[text_input, url_input])
79
 
80
- review_button = gr.Button("Review Blog")
81
- highlighted_output = gr.HTML(label="Highlighted Output (Red = Sensitive)")
82
- corrected_output = gr.Textbox(label="Corrected Text", lines=10)
 
83
 
84
- review_button.click(fn=review_blog, inputs=[input_type, text_input, url_input], outputs=[highlighted_output, corrected_output])
 
 
 
 
85
 
86
- if __name__ == "__main__":
87
- demo.launch()
 
3
  from newspaper import Article
4
  import nltk
5
  from nltk.tokenize import sent_tokenize
 
6
 
7
+ nltk.download("punkt")
 
8
 
9
+ # Load models
10
  grammar_corrector = pipeline("text2text-generation", model="vennify/t5-base-grammar-correction")
11
  toxicity_classifier = pipeline("text-classification", model="unitary/toxic-bert")
12
 
13
+ # Extract text from blog or URL
14
  def extract_text(input_type, text_input, url_input):
15
+ if input_type == "Text":
16
+ return text_input
 
 
 
 
 
 
 
 
 
17
  try:
18
+ article = Article(url_input)
19
+ article.download()
20
+ article.parse()
21
+ return article.text
22
  except Exception as e:
23
+ return f"Error fetching URL: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
+ # Highlight grammar and toxic issues
26
  def review_blog(input_type, text_input, url_input):
27
  text = extract_text(input_type, text_input, url_input)
28
  if text.startswith("Error"):
29
+ return text, "", []
30
 
31
+ # Grammar correction
32
+ grammar_output = grammar_corrector(text, max_length=512)[0]["generated_text"]
33
+
34
+ # Toxic content detection
35
+ sentences = sent_tokenize(text)
36
+ toxic_sentences = []
37
+ for sent in sentences:
38
+ result = toxicity_classifier(sent)[0]
39
+ if result["label"] == "toxic" and result["score"] > 0.7:
40
+ toxic_sentences.append(sent)
41
+
42
+ # Highlight toxic sentences
43
+ highlighted = text
44
+ for sent in toxic_sentences:
45
+ highlighted = highlighted.replace(sent, f"<span style='background-color:red'>{sent}</span>")
46
 
47
+ return highlighted, grammar_output, toxic_sentences
48
 
49
  # Gradio UI
50
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
51
+ gr.Markdown("## 📝 Blog Review AI")
52
+ gr.Markdown("Checks for grammar & sensitive content (toxicity) in blog text or URL.")
53
 
54
+ input_type = gr.Radio(["Text", "URL"], value="Text", label="Input Type")
55
+ text_input = gr.Textbox(label="Enter blog text", lines=10, visible=True)
56
+ url_input = gr.Textbox(label="Enter blog URL", visible=False)
57
 
58
+ def toggle_input(t):
59
+ return {text_input: gr.update(visible=t == "Text"), url_input: gr.update(visible=t == "URL")}
 
 
 
60
 
61
+ input_type.change(toggle_input, input_type, [text_input, url_input])
62
 
63
+ review_btn = gr.Button("Review")
64
+ highlight_output = gr.HTML(label="Toxic Highlighted Text")
65
+ corrected_text = gr.Textbox(label="Grammar Corrected Text", lines=10)
66
+ toxic_list = gr.Textbox(label="Toxic Sentences Detected", lines=5)
67
 
68
+ review_btn.click(
69
+ review_blog,
70
+ inputs=[input_type, text_input, url_input],
71
+ outputs=[highlight_output, corrected_text, toxic_list]
72
+ )
73
 
74
+ demo.launch()