JaishnaCodz commited on
Commit
6784902
·
verified ·
1 Parent(s): 3b87bdd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -44
app.py CHANGED
@@ -3,72 +3,79 @@ from transformers import pipeline
3
  from newspaper import Article
4
  import nltk
5
  from nltk.tokenize import sent_tokenize
 
6
 
7
- nltk.download("punkt")
8
 
9
- # Load models
10
  grammar_corrector = pipeline("text2text-generation", model="vennify/t5-base-grammar-correction")
11
  toxicity_classifier = pipeline("text-classification", model="unitary/toxic-bert")
12
 
13
- # Extract text from blog or URL
 
14
  def extract_text(input_type, text_input, url_input):
15
- if input_type == "Text":
16
- return text_input
17
- try:
18
  article = Article(url_input)
19
  article.download()
20
  article.parse()
21
  return article.text
22
- except Exception as e:
23
- return f"Error fetching URL: {str(e)}"
24
-
25
- # Highlight grammar and toxic issues
26
- def review_blog(input_type, text_input, url_input):
27
- text = extract_text(input_type, text_input, url_input)
28
- if text.startswith("Error"):
29
- return text, "", []
30
 
31
- # Grammar correction
32
- grammar_output = grammar_corrector(text, max_length=512)[0]["generated_text"]
 
33
 
34
- # Toxic content detection
35
  sentences = sent_tokenize(text)
36
- toxic_sentences = []
37
- for sent in sentences:
38
- result = toxicity_classifier(sent)[0]
39
- if result["label"] == "toxic" and result["score"] > 0.7:
40
- toxic_sentences.append(sent)
 
41
 
42
- # Highlight toxic sentences
43
- highlighted = text
44
- for sent in toxic_sentences:
45
- highlighted = highlighted.replace(sent, f"<span style='background-color:red'>{sent}</span>")
 
 
 
 
 
46
 
47
- return highlighted, grammar_output, toxic_sentences
 
 
 
 
 
 
 
48
 
49
  # Gradio UI
50
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
51
- gr.Markdown("## 📝 Blog Review AI")
52
- gr.Markdown("Checks for grammar & sensitive content (toxicity) in blog text or URL.")
 
 
53
 
54
- input_type = gr.Radio(["Text", "URL"], value="Text", label="Input Type")
55
- text_input = gr.Textbox(label="Enter blog text", lines=10, visible=True)
56
- url_input = gr.Textbox(label="Enter blog URL", visible=False)
57
 
58
- def toggle_input(t):
59
- return {text_input: gr.update(visible=t == "Text"), url_input: gr.update(visible=t == "URL")}
 
 
 
60
 
61
- input_type.change(toggle_input, input_type, [text_input, url_input])
62
 
63
- review_btn = gr.Button("Review")
64
- highlight_output = gr.HTML(label="Toxic Highlighted Text")
65
- corrected_text = gr.Textbox(label="Grammar Corrected Text", lines=10)
66
- toxic_list = gr.Textbox(label="Toxic Sentences Detected", lines=5)
67
 
68
- review_btn.click(
69
- review_blog,
70
- inputs=[input_type, text_input, url_input],
71
- outputs=[highlight_output, corrected_text, toxic_list]
72
- )
73
 
74
  demo.launch()
 
3
  from newspaper import Article
4
  import nltk
5
  from nltk.tokenize import sent_tokenize
6
+ import re
7
 
8
+ nltk.download('punkt')
9
 
10
+ # Load grammar correction and toxicity detection models
11
  grammar_corrector = pipeline("text2text-generation", model="vennify/t5-base-grammar-correction")
12
  toxicity_classifier = pipeline("text-classification", model="unitary/toxic-bert")
13
 
14
+ # Functions
15
+
16
  def extract_text(input_type, text_input, url_input):
17
+ if input_type == "URL" and url_input:
 
 
18
  article = Article(url_input)
19
  article.download()
20
  article.parse()
21
  return article.text
22
+ return text_input
 
 
 
 
 
 
 
23
 
24
+ def check_grammar(text):
25
+ result = grammar_corrector(text, max_length=512, do_sample=False)
26
+ return result[0]['generated_text']
27
 
28
+ def detect_sensitive_content(text):
29
  sentences = sent_tokenize(text)
30
+ sensitive = []
31
+ for i, sentence in enumerate(sentences):
32
+ result = toxicity_classifier(sentence)
33
+ if result[0]['label'] == 'toxic' and result[0]['score'] > 0.7:
34
+ sensitive.append({"sentence": sentence, "score": result[0]['score'], "index": i})
35
+ return sensitive
36
 
37
+ def highlight_text(original, corrected, sensitive_issues):
38
+ highlighted = corrected
39
+ for issue in sensitive_issues:
40
+ sent = issue['sentence']
41
+ highlighted = highlighted.replace(sent, f"<span style='background-color: red'>{sent}</span>")
42
+ diff_words = [(o, c) for o, c in zip(original.split(), corrected.split()) if o != c]
43
+ for o, c in diff_words:
44
+ highlighted = highlighted.replace(c, f"<span style='background-color: yellow'>{c}</span>")
45
+ return highlighted
46
 
47
+ def review_blog(input_type, text_input, url_input):
48
+ if not text_input and not url_input:
49
+ return "Please provide input text or a URL.", ""
50
+ raw_text = extract_text(input_type, text_input, url_input)
51
+ corrected = check_grammar(raw_text)
52
+ sensitive = detect_sensitive_content(corrected)
53
+ highlighted = highlight_text(raw_text, corrected, sensitive)
54
+ return highlighted, corrected
55
 
56
  # Gradio UI
57
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
58
+ gr.Markdown("# 🖊️ AI Blog Reviewer")
59
+ gr.Markdown("""Highlights:
60
+ - <span style='background-color: yellow'>**Yellow:** Grammar corrections</span><br>
61
+ - <span style='background-color: red'>**Red:** Sensitive or toxic content</span>""", elem_id="legend")
62
 
63
+ input_type = gr.Radio(["Text", "URL"], label="Input Type", value="Text")
64
+ text_input = gr.Textbox(label="Blog Text", lines=10, visible=True)
65
+ url_input = gr.Textbox(label="Blog URL", visible=False)
66
 
67
+ def toggle_input(choice):
68
+ return {
69
+ text_input: gr.update(visible=choice == "Text"),
70
+ url_input: gr.update(visible=choice == "URL")
71
+ }
72
 
73
+ input_type.change(fn=toggle_input, inputs=input_type, outputs=[text_input, url_input])
74
 
75
+ review_btn = gr.Button("Review Blog")
76
+ html_output = gr.HTML(label="Highlighted Output")
77
+ final_output = gr.Textbox(label="Corrected Blog", lines=10)
 
78
 
79
+ review_btn.click(fn=review_blog, inputs=[input_type, text_input, url_input], outputs=[html_output, final_output])
 
 
 
 
80
 
81
  demo.launch()