Spaces:
Running
Running
import gradio as gr | |
import requests | |
from bs4 import BeautifulSoup | |
from nltk import download, sent_tokenize | |
import google.generativeai as genai | |
import os | |
import re | |
import tempfile | |
import asyncio | |
# Download NLTK data | |
download('punkt') | |
download('punkt_tab') | |
# Configure Gemini API using environment variable | |
api_key = os.environ.get("GEMINI_API_KEY") | |
if not api_key: | |
raise ValueError("GEMINI_API_KEY not found in environment variables. Please set it in your environment.") | |
genai.configure(api_key=api_key) | |
# Use gemini-1.5-flash for faster text analysis | |
try: | |
model = genai.GenerativeModel('gemini-1.5-flash') | |
except Exception as e: | |
print(f"Error initializing model: {str(e)}") | |
print("Available models:") | |
for m in genai.list_models(): | |
print(m.name) | |
raise ValueError("Failed to initialize gemini-1.5-flash. Check available models above and update the model name.") | |
# Prompt for Gemini to analyze text | |
PROMPT = """ | |
You are an AI content reviewer. Analyze the provided text for the following: | |
1. *Grammar Issues*: Identify and suggest corrections for grammatical errors. | |
2. *Legal Policy Violations*: Flag content that may violate common legal policies (e.g., copyright infringement, defamation, incitement to violence). | |
3. *Crude/Abusive Language*: Detect crude, offensive, or abusive language. | |
4. *Sensitive Topics*: Identify content related to sensitive topics such as racism, gender bias, or other forms of discrimination. | |
Return the results in the following markdown format: | |
# Blog Review Report | |
## Grammar Corrections | |
1. [Heading of issue] | |
- CONTENT: [Exact line or part of text with the issue] | |
- SUGGESTION: [Suggested correction] | |
- ISSUE: [Description of the issue] | |
2. [Heading of next issue] | |
- CONTENT: [Exact line or part of text with the issue] | |
- SUGGESTION: [Suggested correction] | |
- ISSUE: [Description of the issue] | |
[Continue numbering for additional issues or state "None detected"] | |
## Legal Policy Violations | |
- CONTENT: [Exact line or part of text with the issue] | |
SUGGESTION: [Suggested action or correction] | |
ISSUE: [Description of the legal violation] | |
[Or state "None detected"] | |
## Crude/Abusive Language | |
- [List instances of crude or abusive language or "None detected"] | |
## Sensitive Topics | |
- [List instances of sensitive topics or "None detected"] | |
For each issue, provide the exact text, a suggested correction or action, and a concise explanation. Be precise and ensure the output strictly follows the specified format. | |
""" | |
async def fetch_url_content(url): | |
try: | |
response = requests.get(url, timeout=10) | |
response.raise_for_status() | |
soup = BeautifulSoup(response.text, 'html.parser') | |
content = ' '.join([p.get_text(strip=True) for p in soup.find_all(['p', 'article', 'div'])]) | |
return content if content else "No readable content found on the page." | |
except Exception as e: | |
return f"Error fetching URL: {str(e)}" | |
async def review_blog(text_input, url_input): | |
# Initialize output variables | |
button_text = "Processing..." | |
report = "" | |
download_path = None | |
# Determine input type | |
if text_input and not url_input: | |
input_type = "Text" | |
input_text = text_input | |
elif url_input and not text_input: | |
input_type = "URL" | |
input_text = url_input | |
else: | |
return "Review Blog", "Error: Please provide input in either the Text or URL tab, but not both.", gr.update(visible=False) | |
# Handle empty input | |
if not input_text: | |
return "Review Blog", "Error: No input provided.", gr.update(visible=False) | |
try: | |
# Wrap the entire process in a timeout | |
async def process_with_timeout(): | |
nonlocal button_text, report, download_path | |
# Handle URL input | |
if input_type == "URL": | |
button_text = "Fetching content..." | |
content = await fetch_url_content(input_text) | |
if content.startswith("Error"): | |
return "Review Blog", content, gr.update(visible=False) | |
input_text_content = content | |
else: | |
input_text_content = input_text | |
# Tokenize input for analysis | |
sentences = sent_tokenize(input_text_content) | |
analysis_text = "\n".join(sentences) | |
# Update button for API call | |
button_text = "Generating report..." | |
try: | |
response = await asyncio.to_thread(model.generate_content, PROMPT + "\n\nText to analyze:\n" + analysis_text) | |
report = response.text.strip() | |
report = re.sub(r'^```markdown\n|```$', '', report, flags=re.MULTILINE) | |
except Exception as e: | |
report = f"Error analyzing content with Gemini: {str(e)}. Please check your API key, network connection, or model availability." | |
print("Available models:") | |
for m in genai.list_models(): | |
print(m.name) | |
return "Review Blog", report, gr.update(visible=False) | |
# Create a temporary file to store the report | |
try: | |
with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False, encoding='utf-8') as temp_file: | |
temp_file.write(report) | |
download_path = temp_file.name | |
report = f"**Report generated, please scroll down to view.**\n\n{report}" | |
return "Review Blog", report, gr.update(visible=True, value=download_path) | |
except Exception as e: | |
return "Review Blog", f"Error creating temporary file: {str(e)}", gr.update(visible=False) | |
# Execute with timeout | |
return await asyncio.wait_for(process_with_timeout(), timeout=30) | |
except asyncio.TimeoutError: | |
return "Review Blog", "Error: Process timed out after 30 seconds.", gr.update(visible=False) | |
except Exception as e: | |
return "Review Blog", f"Unexpected error: {str(e)}", gr.update(visible=False) | |
# Custom CSS for hover effect, loading state, and Inter font | |
custom_css = """ | |
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap'); | |
.gradio-container { | |
font-family: 'Inter', sans-serif !important; | |
} | |
.review-btn { | |
transition: all 0.3s ease; | |
font-weight: 500; | |
background-color: #2c3e50; | |
color: white; | |
border-radius: 8px; | |
padding: 10px 20px; | |
position: relative; | |
} | |
.review-btn:hover { | |
background-color: #4CAF50; | |
color: white; | |
transform: scale(1.05); | |
} | |
.review-btn:disabled { | |
opacity: 0.7; | |
cursor: not-allowed; | |
} | |
.review-btn:disabled::before { | |
content: ''; | |
display: inline-block; | |
width: 16px; | |
height: 16px; | |
border: 2px solid #fff; | |
border-radius: 50%; | |
border-top-color: transparent; | |
animation: spin 1s linear infinite; | |
margin-right: 8px; | |
vertical-align: middle; | |
} | |
@keyframes spin { | |
0% { transform: rotate(0deg); } | |
100% { transform: rotate(360deg); } | |
} | |
.tab-nav button { | |
font-family: 'Inter', sans-serif; | |
font-weight: 500; | |
} | |
input, textarea { | |
font-family: 'Inter', sans-serif; | |
} | |
""" | |
# Gradio UI with Tabs | |
with gr.Blocks(theme=gr.themes.Monochrome(), css=custom_css) as demo: | |
gr.Markdown("# π AI Blog Reviewer") | |
gr.Markdown("Enter blog text or a URL to review for grammar, legal issues, crude language, and sensitive topics. The report is generated in markdown format.") | |
with gr.Tabs(): | |
with gr.TabItem("Text"): | |
text_input = gr.Textbox(lines=8, label="Blog Content", placeholder="Paste your blog text here...") | |
with gr.TabItem("URL"): | |
url_input = gr.Textbox(lines=1, label="Blog URL", placeholder="Enter the blog URL here...") | |
status_button = gr.Button(value="Review Blog", elem_classes=["review-btn"]) | |
gr.Markdown("### π Review Report") | |
report_output = gr.Markdown() | |
download_btn = gr.File(label="Download Report", visible=False) | |
# Bind the review button to process inputs | |
status_button.click( | |
fn=review_blog, | |
inputs=[text_input, url_input], | |
outputs=[status_button, report_output, download_btn] | |
) | |
demo.launch() |