JaishnaCodz commited on
Commit
7d19342
Β·
verified Β·
1 Parent(s): b8a6b71

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +85 -64
app.py CHANGED
@@ -1,84 +1,105 @@
1
  import gradio as gr
2
- from transformers import pipeline
 
3
  from nltk import download, sent_tokenize
 
 
4
  import re
5
 
6
- # Download necessary NLTK models
7
  download('punkt')
8
- download('punkt_tab') # Fixes the recent error with PunktTokenizer
9
-
10
- # Load the grammar correction model (T5 based)
11
- grammar_corrector = pipeline("text2text-generation", model="vennify/t5-base-grammar-correction")
12
-
13
- # Define sensitive/toxic keyword patterns (you can extend this list)
14
- sensitive_keywords = [
15
- r"\bhate\b", r"\bstupid\b", r"\bidiot\b", r"\btrash\b", r"\bkill\b",
16
- r"\bnot allowed\b", r"\bnobody cares\b", r"\bterrorist\b", r"\bgo back\b",
17
- r"\bimmigrants\b", r"\bslur\b", r"\bdisgusting\b"
18
- ]
19
-
20
- def detect_sensitive_content(text):
21
- # Tokenize into sentences
22
- sentences = sent_tokenize(text)
23
- highlighted = []
24
- for sentence in sentences:
25
- flagged = False
26
- for pattern in sensitive_keywords:
27
- if re.search(pattern, sentence, re.IGNORECASE):
28
- flagged = True
29
- break
30
- if flagged:
31
- sentence = f"<span style='color: red'>{sentence}</span>"
32
- highlighted.append(sentence)
33
- return " ".join(highlighted)
34
-
35
- def highlight_grammar(original, corrected):
36
- original_words = original.split()
37
- corrected_words = corrected.split()
38
- highlighted = []
39
-
40
- for orig, corr in zip(original_words, corrected_words):
41
- if orig != corr:
42
- highlighted.append(f"<span style='color: yellow'>{corr}</span>")
43
- else:
44
- highlighted.append(corr)
45
-
46
- # Add remaining corrected words
47
- if len(corrected_words) > len(original_words):
48
- for word in corrected_words[len(original_words):]:
49
- highlighted.append(f"<span style='color: yellow'>{word}</span>")
50
-
51
- return " ".join(highlighted)
 
 
 
52
 
53
  def review_blog(input_text, input_type):
 
54
  if input_type == "URL":
55
- return "URL support is under development."
 
 
56
 
57
- # Step 1: Grammar correction
58
- corrected_output = grammar_corrector(input_text, max_length=512, do_sample=False)[0]["generated_text"]
 
59
 
60
- # Step 2: Highlight grammar issues
61
- grammar_highlighted = highlight_grammar(input_text, corrected_output)
 
 
 
 
 
 
62
 
63
- # Step 3: Highlight sensitive content
64
- sensitive_highlighted = detect_sensitive_content(corrected_output)
65
-
66
- return gr.update(value=grammar_highlighted), gr.update(value=sensitive_highlighted)
67
 
68
  # Gradio UI
69
  with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
70
- gr.Markdown("πŸ“ **AI Blog Reviewer**")
71
- gr.Markdown("#### Highlights:\n- <span style='color: yellow'>Yellow</span>: Grammar corrections\n- <span style='color: red'>Red</span>: Sensitive or toxic content")
72
 
73
  input_type = gr.Radio(["Text", "URL"], label="Input Type", value="Text")
74
- blog_input = gr.Textbox(lines=8, label="Blog Text", placeholder="Paste your blog content here...")
75
  review_btn = gr.Button("Review Blog")
76
 
77
- gr.Markdown("#### πŸ”§ Grammar Corrections:")
78
- grammar_output = gr.HTML()
79
- gr.Markdown("#### 🚨 Sensitive/Toxic Content:")
80
- sensitive_output = gr.HTML()
 
 
 
 
81
 
82
- review_btn.click(review_blog, inputs=[blog_input, input_type], outputs=[grammar_output, sensitive_output])
 
 
 
 
83
 
84
- demo.launch()
 
1
  import gradio as gr
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
  from nltk import download, sent_tokenize
5
+ import google.generativeai as genai
6
+ import os
7
  import re
8
 
9
+ # Download NLTK data
10
  download('punkt')
11
+ download('punkt_tab')
12
+
13
+ # Configure Gemini API using Hugging Face Spaces secrets
14
+ api_key = os.environ.get("GEMINI_API_KEY")
15
+ if not api_key:
16
+ raise ValueError("GEMINI_API_KEY not found in environment variables. Please set it in Hugging Face Spaces secrets.")
17
+ genai.configure(api_key=api_key)
18
+ model = genai.GenerativeModel('gemini-2.5')
19
+
20
+ # Prompt for Gemini to analyze text
21
+ PROMPT = """
22
+ You are an AI content reviewer. Analyze the provided text for the following:
23
+ 1. **Grammar Issues**: Identify and suggest corrections for grammatical errors.
24
+ 2. **Legal Policy Violations**: Flag content that may violate common legal policies (e.g., copyright infringement, defamation, incitement to violence).
25
+ 3. **Crude/Abusive Language**: Detect crude, offensive, or abusive language.
26
+ 4. **Sensitive Topics**: Identify content related to sensitive topics such as racism, gender bias, or other forms of discrimination.
27
+
28
+ Return the results in the following markdown format:
29
+ ```markdown
30
+ # Blog Review Report
31
+
32
+ ## Grammar Corrections
33
+ - [List grammar issues and suggested corrections]
34
+
35
+ ## Legal Policy Violations
36
+ - [List any potential legal violations or "None detected"]
37
+
38
+ ## Crude/Abusive Language
39
+ - [List instances of crude or abusive language or "None detected"]
40
+
41
+ ## Sensitive Topics
42
+ - [List instances of sensitive topics or "None detected"]
43
+ ```
44
+
45
+ For each issue, provide the original sentence, the issue, and the suggested correction or explanation. Be precise and concise.
46
+ """
47
+
48
+ def fetch_url_content(url):
49
+ try:
50
+ response = requests.get(url, timeout=10)
51
+ response.raise_for_status()
52
+ soup = BeautifulSoup(response.text, 'html.parser')
53
+ # Extract text from common content tags
54
+ content = ' '.join([p.get_text(strip=True) for p in soup.find_all(['p', 'article', 'div'])])
55
+ return content if content else "No readable content found on the page."
56
+ except Exception as e:
57
+ return f"Error fetching URL: {str(e)}"
58
 
59
  def review_blog(input_text, input_type):
60
+ # Handle input type
61
  if input_type == "URL":
62
+ input_text = fetch_url_content(input_text)
63
+ if input_text.startswith("Error"):
64
+ return input_text, None
65
 
66
+ # Tokenize input for analysis
67
+ sentences = sent_tokenize(input_text)
68
+ analysis_text = "\n".join(sentences)
69
 
70
+ # Query Gemini with the prompt
71
+ try:
72
+ response = model.generate_content(PROMPT + "\n\nText to analyze:\n" + analysis_text)
73
+ report = response.text.strip()
74
+ # Ensure the response is markdown by removing any code fences
75
+ report = re.sub(r'^```markdown\n|```$', '', report, flags=re.MULTILINE)
76
+ except Exception as e:
77
+ report = f"Error analyzing content with Gemini: {str(e)}"
78
 
79
+ return report, report
 
 
 
80
 
81
  # Gradio UI
82
  with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
83
+ gr.Markdown("# πŸ“ AI Blog Reviewer")
84
+ gr.Markdown("Enter blog text or a URL to review for grammar, legal issues, crude language, and sensitive topics. The report is generated in markdown format.")
85
 
86
  input_type = gr.Radio(["Text", "URL"], label="Input Type", value="Text")
87
+ blog_input = gr.Textbox(lines=8, label="Blog Content or URL", placeholder="Paste your blog text or URL here...")
88
  review_btn = gr.Button("Review Blog")
89
 
90
+ gr.Markdown("### πŸ“„ Review Report")
91
+ report_output = gr.Markdown()
92
+ download_btn = gr.File(label="Download Report", visible=False)
93
+
94
+ def handle_review_output(report, file_report):
95
+ if file_report and not file_report.startswith("Error"):
96
+ return report, gr.update(visible=True, value={"content": file_report, "filename": "blog_review_report.md"})
97
+ return report, gr.update(visible=False)
98
 
99
+ review_btn.click(
100
+ review_blog,
101
+ inputs=[blog_input, input_type],
102
+ outputs=[report_output, download_btn]
103
+ )
104
 
105
+ demo.launch()