Spaces:
Running
Running
File size: 7,748 Bytes
768c740 7d19342 b8a6b71 7d19342 6784902 20bfd0b 56f7cbb 7d19342 b8a6b71 7d19342 20bfd0b 7d19342 20bfd0b 7d19342 20bfd0b 7d19342 20bfd0b 7d19342 20bfd0b 7d19342 20bfd0b 7d19342 20bfd0b 7d19342 20bfd0b 7d19342 b8a6b71 20bfd0b b8a6b71 20bfd0b 7d19342 20bfd0b 768c740 7d19342 768c740 20bfd0b 7d19342 20bfd0b 7d19342 20bfd0b 7d19342 20bfd0b 768c740 20bfd0b b8a6b71 20bfd0b 7d19342 b8a6b71 20bfd0b dbbf7cc 7d19342 20bfd0b 7d19342 768c740 7d19342 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 |
import gradio as gr
import requests
from bs4 import BeautifulSoup
from nltk import download, sent_tokenize
import google.generativeai as genai
import os
import re
import tempfile
import asyncio
import time
# Download NLTK data
download('punkt')
download('punkt_tab')
# Configure Gemini API using environment variable
api_key = os.environ.get("GEMINI_API_KEY")
if not api_key:
raise ValueError("GEMINI_API_KEY not found in environment variables. Please set it in your environment.")
genai.configure(api_key=api_key)
# Use gemini-1.5-flash for faster and more accessible text analysis
try:
model = genai.GenerativeModel('gemini-1.5-flash')
except Exception as e:
# Fallback: List available models if the specified model is not found
print(f"Error initializing model: {str(e)}")
print("Available models:")
for m in genai.list_models():
print(m.name)
raise ValueError("Failed to initialize gemini-1.5-flash. Check available models above and update the model name.")
# Prompt for Gemini to analyze text with specified output format
PROMPT = """
You are an AI content reviewer. Analyze the provided text for the following:
1. Grammar Issues: Identify and suggest corrections for grammatical errors.
2. Legal Policy Violations: Flag content that may violate common legal policies (e.g., copyright infringement, defamation, incitement to violence).
3. Crude/Abusive Language: Detect crude, offensive, or abusive language.
4. Sensitive Topics: Identify content related to sensitive topics such as racism, gender bias, or other forms of discrimination.
Return the results in the following markdown format:
# Blog Review Report
## Grammar Corrections
1. [Heading of issue]
- CONTENT: [Exact line or part of text with the issue]
- SUGGESTION: [Suggested correction]
- ISSUE: [Description of the issue]
2. [Heading of next issue]
- CONTENT: [Exact line or part of text with the issue]
- SUGGESTION: [Suggested correction]
- ISSUE: [Description of the issue]
[Continue numbering for additional issues or state "None detected"]
## Legal Policy Violations
- CONTENT: [Exact line or part of text with the issue]
SUGGESTION: [Suggested action or correction]
ISSUE: [Description of the legal violation]
[Or state "None detected"]
## Crude/Abusive Language
- [List instances of crude or abusive language or "None detected"]
## Sensitive Topics
- [List instances of sensitive topics or "None detected"]
For each issue, provide the exact text, a suggested correction or action, and a concise explanation. Be precise and ensure the output strictly follows the specified format.
"""
async def fetch_url_content(url):
try:
response = requests.get(url, timeout=10)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
# Extract text from common content tags
content = ' '.join([p.get_text(strip=True) for p in soup.find_all(['p', 'article', 'div'])])
return content if content else "No readable content found on the page."
except Exception as e:
return f"Error fetching URL: {str(e)}"
async def review_blog(text_input, url_input, progress=gr.Progress()):
# Determine input type based on which field is populated
if text_input and not url_input:
input_type = "Text"
input_text = text_input
elif url_input and not text_input:
input_type = "URL"
input_text = url_input
else:
return "Review Blog", "Error: Please provide input in either the Text or URL tab, but not both.", gr.update(visible=False)
# Handle empty input
if not input_text:
return "Review Blog", "Error: No input provided.", gr.update(visible=False)
# Handle URL input
if input_type == "URL":
progress(0, desc="Fetching content...")
input_text = await fetch_url_content(input_text)
if input_text.startswith("Error"):
return "Review Blog", input_text, gr.update(visible=False)
# Tokenize input for analysis
sentences = sent_tokenize(input_text)
analysis_text = "\n".join(sentences)
# Simulate progress for API call
progress(0, desc="Generating report...")
start_time = time.time()
for i in range(1, 10):
await asyncio.sleep(1) # Simulate progress every second
progress(i / 10, desc=f"Generating report... ({int(i * 10)}%)")
if time.time() - start_time > 30: # Timeout after 30 seconds
return "Review Blog", "Error: API request timed out after 30 seconds.", gr.update(visible=False)
# Query Gemini with the prompt
try:
response = await asyncio.to_thread(model.generate_content, PROMPT + "\n\nText to analyze:\n" + analysis_text)
report = response.text.strip()
# Ensure the response is markdown by removing any code fences
report = re.sub(r'^markdown\n|$', '', report, flags=re.MULTILINE)
except Exception as e:
report = f"Error analyzing content with Gemini: {str(e)}"
# Fallback: List available models for debugging
print("Available models:")
for m in genai.list_models():
print(m.name)
return "Review Blog", report, gr.update(visible=False)
# Create a temporary file to store the report
try:
with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False, encoding='utf-8') as temp_file:
temp_file.write(report)
temp_file_path = temp_file.name
progress(1.0, desc="Report generated!")
return "Review Blog", report, gr.update(visible=True, value=temp_file_path)
except Exception as e:
return "Review Blog", f"Error creating temporary file: {str(e)}", gr.update(visible=False)
# Custom CSS for hover effect, loading state, and Inter font
custom_css = """
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap');
.gradio-container {
font-family: 'Inter', sans-serif !important;
}
.review-btn {
transition: all 0.3s ease;
font-weight: 500;
background-color: #2c3e50;
color: white;
border-radius: 8px;
padding: 10px 20px;
}
.review-btn:hover {
background-color: #4CAF50;
color: white;
transform: scale(1.05);
}
.review-btn:disabled {
opacity: 0.7;
cursor: not-allowed;
}
.review-btn:disabled::after {
content: ' β³';
}
.tab-nav button {
font-family: 'Inter', sans-serif;
font-weight: 500;
}
input, textarea {
font-family: 'Inter', sans-serif;
}
.gr-progress {
background-color: #e0e0e0;
border-radius: 8px;
overflow: hidden;
}
.gr-progress > div {
background-color: #4CAF50;
height: 20px;
transition: width 0.3s ease;
}
"""
# Gradio UI with Tabs
with gr.Blocks(theme=gr.themes.Monochrome(), css=custom_css) as demo:
gr.Markdown("# π AI Blog Reviewer")
gr.Markdown("Enter blog text or a URL to review for grammar, legal issues, crude language, and sensitive topics. The report is generated in markdown format.")
with gr.Tabs():
with gr.TabItem("Text"):
text_input = gr.Textbox(lines=8, label="Blog Content", placeholder="Paste your blog text here...")
with gr.TabItem("URL"):
url_input = gr.Textbox(lines=1, label="Blog URL", placeholder="Enter the blog URL here...")
status_button = gr.Button(value="Review Blog", elem_classes=["review-btn"])
gr.Markdown("### π Review Report")
report_output = gr.Markdown()
download_btn = gr.File(label="Download Report", visible=False)
# Bind the review button to process inputs
status_button.click(
fn=review_blog,
inputs=[text_input, url_input],
outputs=[status_button, report_output, download_btn]
)
demo.launch() |