Spaces:

Ashokdll
/

hate-speech-detector-app

Sleeping

File size: 34,123 Bytes

import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
import numpy as np
import json
from datetime import datetime
import logging
import os

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class PromptBasedMultiAgentSystem:
    def __init__(self):
        self.detection_agent = None
        self.counter_speech_agent = None
        self.moderation_agent = None
        self.sentiment_agent = None
        
        # Load prompt configurations
        self.counter_speech_prompts = self.load_prompts("counter_speech_prompts.json")
        self.moderation_prompts = self.load_prompts("moderation_prompts.json")
        
        self.initialize_agents()
    
    def load_prompts(self, filename):
        """Load prompts from JSON file with fallback"""
        try:
            if os.path.exists(filename):
                with open(filename, 'r', encoding='utf-8') as f:
                    return json.load(f)
            else:
                logger.warning(f"Prompt file {filename} not found, using built-in prompts")
                return self.get_default_prompts(filename)
        except Exception as e:
            logger.error(f"Error loading prompts from {filename}: {e}")
            return self.get_default_prompts(filename)
    
    def get_default_prompts(self, filename):
        """Default prompts as fallback"""
        if "counter_speech" in filename:
            return {
                "counter_speech_prompts": {
                    "high_risk": {
                        "system_prompt": "You are an expert educator specializing in counter-speech and conflict de-escalation.",
                        "user_prompt_template": "Generate a respectful, educational counter-speech response to address harmful content while promoting understanding. Original text (Risk: {risk_level}, Confidence: {confidence}%, Sentiment: {sentiment}): \"{original_text}\"\n\nCounter-speech response:",
                    },
                    "general_template": {
                        "fallback_responses": [
                            "Thank you for sharing your thoughts. Building strong communities works best when we focus on shared values and constructive dialogue. How might we work together on the concerns you've raised?",
                            "I appreciate your perspective. Sometimes our strongest feelings can be expressed in ways that bring people together. What specific positive changes would you like to see?",
                            "Your engagement with this topic is clear. When we channel that energy into inclusive dialogue, we often find solutions that work for everyone."
                        ]
                    }
                }
            }
        else:
            return {
                "moderation_prompts": {
                    "comprehensive_analysis": {
                        "system_prompt": "You are an expert content moderation specialist analyzing text for safety and compliance.",
                        "user_prompt_template": "Analyze this text for potential violations: \"{text}\"\n\nProvide: 1) Safety assessment 2) Violation categories 3) Severity level 4) Confidence score 5) Recommended action\n\nAnalysis:",
                    }
                }
            }
    
    def initialize_agents(self):
        """Initialize all AI agents"""
        logger.info("🤖 Initializing Prompt-Based Multi-Agent System...")
        
        self.setup_detection_agent()
        self.setup_counter_speech_agent()
        self.setup_moderation_agent()
        self.setup_sentiment_agent()
        
        logger.info("✅ All agents initialized successfully!")
    
    def setup_detection_agent(self):
        """Initialize the hate speech detection agent"""
        try:
            logger.info("🔍 Loading Detection Agent (Fine-tuned DistilBERT)...")
            model_path = "./model"
            
            tokenizer = AutoTokenizer.from_pretrained(model_path)
            model = AutoModelForSequenceClassification.from_pretrained(
                model_path,
                torch_dtype=torch.float32
            )
            
            self.detection_agent = pipeline(
                "text-classification",
                model=model,
                tokenizer=tokenizer,
                return_all_scores=True,
                device=0 if torch.cuda.is_available() else -1
            )
            logger.info("✅ Detection Agent loaded successfully")
            
        except Exception as e:
            logger.error(f"❌ Detection Agent failed: {e}")
            logger.info("🔄 Using fallback detection model...")
            self.detection_agent = pipeline(
                "text-classification",
                model="unitary/toxic-bert",
                return_all_scores=True
            )
    
    def setup_counter_speech_agent(self):
        """Initialize counter-speech generation agent with prompts"""
        try:
            logger.info("💬 Loading Counter-Speech Agent with Custom Prompts...")
            
            # Using FLAN-T5 which is excellent at following instructions
            self.counter_speech_agent = pipeline(
                "text2text-generation",
                model="google/flan-t5-base",
                max_length=200,
                do_sample=True,
                temperature=0.7,
                top_p=0.9,
                device=0 if torch.cuda.is_available() else -1
            )
            logger.info("✅ Counter-Speech Agent loaded (FLAN-T5 with custom prompts)")
            
        except Exception as e:
            logger.error(f"❌ Counter-Speech Agent failed: {e}")
            self.counter_speech_agent = None
    
    def setup_moderation_agent(self):
        """Initialize content moderation agent with prompts"""
        try:
            logger.info("🛡️ Loading Moderation Agent with Custom Prompts...")
            
            # Using FLAN-T5 for structured moderation analysis
            self.moderation_agent = pipeline(
                "text2text-generation",
                model="google/flan-t5-base",
                max_length=300,
                do_sample=False,
                device=0 if torch.cuda.is_available() else -1
            )
            logger.info("✅ Moderation Agent loaded (FLAN-T5 with analysis prompts)")
            
        except Exception as e:
            logger.error(f"❌ Moderation Agent failed: {e}")
            self.moderation_agent = None
    
    def setup_sentiment_agent(self):
        """Initialize sentiment analysis agent"""
        try:
            logger.info("📊 Loading Sentiment Agent...")
            
            self.sentiment_agent = pipeline(
                "sentiment-analysis",
                model="cardiffnlp/twitter-roberta-base-sentiment-latest",
                return_all_scores=True,
                device=0 if torch.cuda.is_available() else -1
            )
            logger.info("✅ Sentiment Agent loaded (Twitter-RoBERTa)")
            
        except Exception as e:
            logger.error(f"❌ Sentiment Agent failed: {e}")
            self.sentiment_agent = None
    
    def detect_hate_speech(self, text):
        """Detection Agent: Analyze text for hate speech"""
        if not text or not text.strip():
            return {
                "status": "❌ Please enter some text to analyze.",
                "prediction": "No input",
                "confidence": 0.0,
                "all_scores": {},
                "risk_level": "Unknown",
                "is_hate_speech": False
            }
        
        try:
            results = self.detection_agent(text.strip())
            
            if isinstance(results, list) and len(results) > 0:
                if isinstance(results[0], list):
                    results = results[0]
                
                all_scores = {}
                max_score = 0
                predicted_label = "UNKNOWN"
                
                for result in results:
                    label = result["label"]
                    score = result["score"]
                    all_scores[label] = {
                        "score": score,
                        "percentage": f"{score*100:.2f}%",
                        "confidence": f"{score:.4f}"
                    }
                    
                    if score > max_score:
                        max_score = score
                        predicted_label = label
                
                # Determine hate speech status
                hate_keywords = ["HATE", "TOXIC", "NEGATIVE", "HARMFUL", "1", "LABEL_1"]
                
                is_hate_speech = False
                risk_level = "Low"
                
                if any(keyword in predicted_label.upper() for keyword in hate_keywords):
                    if max_score > 0.8:
                        is_hate_speech = True
                        risk_level = "High"
                        status = f"🚨 High confidence hate speech detected! ({predicted_label}: {max_score:.2%})"
                    elif max_score > 0.6:
                        is_hate_speech = True
                        risk_level = "Medium"
                        status = f"⚠️ Potential hate speech detected ({predicted_label}: {max_score:.2%})"
                    else:
                        risk_level = "Low-Medium"
                        status = f"⚡ Low confidence detection ({predicted_label}: {max_score:.2%})"
                else:
                    risk_level = "Low"
                    status = f"✅ No hate speech detected ({predicted_label}: {max_score:.2%})"
                
                return {
                    "status": status,
                    "prediction": predicted_label,
                    "confidence": max_score,
                    "all_scores": all_scores,
                    "risk_level": risk_level,
                    "is_hate_speech": is_hate_speech
                }
                
        except Exception as e:
            logger.error(f"Detection error: {e}")
            return {
                "status": f"❌ Detection error: {str(e)}",
                "prediction": "Error",
                "confidence": 0.0,
                "all_scores": {},
                "risk_level": "Unknown",
                "is_hate_speech": False
            }
    
    def analyze_sentiment(self, text):
        """Sentiment Agent: Analyze emotional tone"""
        if not self.sentiment_agent or not text.strip():
            return {"sentiment": "neutral", "confidence": 0.0}
        
        try:
            results = self.sentiment_agent(text.strip())
            if isinstance(results, list) and len(results) > 0:
                if isinstance(results[0], list):
                    results = results[0]
                
                best_sentiment = max(results, key=lambda x: x['score'])
                return {
                    "sentiment": best_sentiment['label'].lower(),
                    "confidence": best_sentiment['score'],
                    "all_sentiments": {r['label']: r['score'] for r in results}
                }
        except Exception as e:
            logger.error(f"Sentiment analysis error: {e}")
            return {"sentiment": "neutral", "confidence": 0.0}
    
    def moderate_content_with_prompts(self, text, detection_result, sentiment_result):
        """Moderation Agent: Structured analysis using prompts"""
        if not self.moderation_agent or not text.strip():
            return {"analysis": "Unable to perform moderation analysis", "confidence": 0.0}
        
        try:
            # Get the appropriate moderation prompt
            moderation_config = self.moderation_prompts.get("moderation_prompts", {})
            analysis_config = moderation_config.get("comprehensive_analysis", {})
            
            # Construct the analysis prompt
            system_prompt = analysis_config.get("system_prompt", "Analyze this text for safety concerns.")
            user_prompt_template = analysis_config.get("user_prompt_template", "Analyze: {text}")
            
            # Fill in the template
            full_prompt = f"{system_prompt}\n\n{user_prompt_template.format(text=text)}"
            
            # Generate analysis
            result = self.moderation_agent(full_prompt, max_length=250, do_sample=False)
            
            if result and len(result) > 0:
                analysis_text = result[0]['generated_text']
                
                # Parse the analysis for key information
                confidence = self.extract_confidence_from_analysis(analysis_text)
                safety_level = self.extract_safety_level_from_analysis(analysis_text)
                
                return {
                    "analysis": analysis_text,
                    "confidence": confidence,
                    "safety_level": safety_level,
                    "prompt_used": "comprehensive_analysis"
                }
            
        except Exception as e:
            logger.error(f"Moderation analysis error: {e}")
            
        # Fallback analysis
        return {
            "analysis": f"Basic assessment: Risk level {detection_result.get('risk_level', 'unknown')}, requires review if confidence > 70%",
            "confidence": detection_result.get('confidence', 0.0),
            "safety_level": "review_needed" if detection_result.get('confidence', 0) > 0.7 else "acceptable"
        }
    
    def generate_counter_speech_with_prompts(self, text, detection_result, sentiment_result):
        """Counter-Speech Agent: Generate response using custom prompts"""
        if not detection_result.get("is_hate_speech", False):
            return "✨ This text promotes positive communication. Great job maintaining respectful dialogue!"
        
        risk_level = detection_result.get("risk_level", "Low").lower()
        confidence = detection_result.get("confidence", 0.0) * 100
        sentiment = sentiment_result.get("sentiment", "neutral")
        
        # Get appropriate prompts based on risk level
        counter_speech_config = self.counter_speech_prompts.get("counter_speech_prompts", {})
        
        # Select prompt based on risk level
        if risk_level == "high":
            prompt_config = counter_speech_config.get("high_risk", {})
        elif risk_level == "medium":
            prompt_config = counter_speech_config.get("medium_risk", {})
        else:
            prompt_config = counter_speech_config.get("low_risk", {})
        
        # If no specific config, use general template
        if not prompt_config:
            prompt_config = counter_speech_config.get("general_template", {})
        
        if self.counter_speech_agent and prompt_config:
            try:
                # Construct the prompt
                system_prompt = prompt_config.get("system_prompt", "Generate a respectful counter-speech response.")
                user_prompt_template = prompt_config.get("user_prompt_template", 
                    "Generate a counter-speech response for: {original_text}")
                
                # Fill in the template
                full_prompt = f"{system_prompt}\n\n{user_prompt_template.format(original_text=text, risk_level=risk_level, confidence=confidence, sentiment=sentiment)}"
                
                # Generate counter-speech
                result = self.counter_speech_agent(full_prompt, max_length=150, do_sample=True, temperature=0.7)
                
                if result and len(result) > 0:
                    generated_text = result[0]['generated_text']
                    
                    # Clean up the response
                    if "Counter-speech response:" in generated_text:
                        generated_text = generated_text.split("Counter-speech response:")[-1].strip()
                    elif "response:" in generated_text.lower():
                        parts = generated_text.lower().split("response:")
                        if len(parts) > 1:
                            generated_text = parts[-1].strip()
                    
                    return f"🤖 **AI-Generated Counter-Speech** ({risk_level.title()} Risk): {generated_text}"
            
            except Exception as e:
                logger.error(f"Counter-speech generation error: {e}")
        
        # Fallback to template responses
        fallback_responses = counter_speech_config.get("general_template", {}).get("fallback_responses", [
            "Thank you for sharing your thoughts. Building strong communities works best when we focus on shared values and constructive dialogue."
        ])
        
        import random
        return f"📝 **Template Response** ({risk_level.title()} Risk): {random.choice(fallback_responses)}"
    
    def extract_confidence_from_analysis(self, analysis_text):
        """Extract confidence score from moderation analysis"""
        import re
        # Look for confidence patterns like "85%" or "confidence: 0.85"
        patterns = [
            r'(\d+)%',
            r'confidence[:\s]+(\d*\.?\d+)',
            r'(\d*\.?\d+)\s*confidence'
        ]
        
        for pattern in patterns:
            match = re.search(pattern, analysis_text.lower())
            if match:
                value = float(match.group(1))
                return value / 100 if value > 1 else value
        
        return 0.5  # Default moderate confidence
    
    def extract_safety_level_from_analysis(self, analysis_text):
        """Extract safety assessment from moderation analysis"""
        analysis_lower = analysis_text.lower()
        
        if any(word in analysis_lower for word in ['harmful', 'high risk', 'remove', 'violation']):
            return "harmful"
        elif any(word in analysis_lower for word in ['concerning', 'medium risk', 'review', 'warning']):
            return "concerning"
        elif any(word in analysis_lower for word in ['safe', 'low risk', 'acceptable', 'approve']):
            return "safe"
        else:
            return "review_needed"
    
    def comprehensive_analysis(self, text):
        """Run all agents with prompt-based analysis"""
        start_time = datetime.now()
        
        # Run core agents
        detection_result = self.detect_hate_speech(text)
        sentiment_result = self.analyze_sentiment(text)
        
        # Run prompt-based agents
        moderation_result = self.moderate_content_with_prompts(text, detection_result, sentiment_result)
        counter_speech = self.generate_counter_speech_with_prompts(text, detection_result, sentiment_result)
        
        processing_time = (datetime.now() - start_time).total_seconds()
        
        return {
            "detection": detection_result,
            "sentiment": sentiment_result,
            "moderation": moderation_result,
            "counter_speech": counter_speech,
            "processing_time": processing_time,
            "timestamp": datetime.now().isoformat()
        }

# Initialize the system
logger.info("🚀 Starting Prompt-Based Multi-Agent System...")
agent_system = PromptBasedMultiAgentSystem()

def analyze_text_with_prompts(text):
    """Main analysis function using prompt-based agents"""
    if not text or not text.strip():
        return (
            "❌ Please enter some text to analyze.",
            {},
            "No analysis performed.",
            "No input provided",
            {}
        )
    
    # Run comprehensive analysis with prompts
    results = agent_system.comprehensive_analysis(text)
    
    # Extract results for display
    detection_status = results["detection"]["status"]
    detection_scores = results["detection"]["all_scores"]
    counter_speech = results["counter_speech"]
    
    # Create detailed agent summary
    agent_summary = f"""
🔍 **Detection Agent**: {results['detection']['risk_level']} risk ({results['detection']['confidence']:.2%} confidence)
📊 **Sentiment Agent**: {results['sentiment']['sentiment'].title()} ({results['sentiment']['confidence']:.2%} confidence)
🛡️ **Moderation Agent**: {results['moderation'].get('safety_level', 'unknown').title()} safety level ({results['moderation'].get('confidence', 0):.2%} confidence)
💬 **Counter-Speech Agent**: {"Custom prompt-based" if "AI-Generated" in counter_speech else "Template-based"} response
⏱️ **Processing Time**: {results['processing_time']:.3f} seconds

📋 **Moderation Analysis**: {results['moderation'].get('analysis', 'No detailed analysis available')[:200]}...
"""
    
    # Compile comprehensive agent data
    all_agent_data = {
        "Detection_Analysis": {
            "scores": detection_scores,
            "risk_level": results['detection']['risk_level'],
            "is_hate_speech": results['detection']['is_hate_speech']
        },
        "Sentiment_Analysis": {
            "primary_sentiment": results['sentiment']['sentiment'],
            "all_sentiments": results["sentiment"].get("all_sentiments", {})
        },
        "Moderation_Analysis": {
            "safety_assessment": results['moderation'].get('safety_level', 'unknown'),
            "detailed_analysis": results['moderation'].get('analysis', ''),
            "confidence": results['moderation'].get('confidence', 0),
            "prompt_used": results['moderation'].get('prompt_used', 'fallback')
        },
        "Counter_Speech": {
            "response": counter_speech,
            "generation_method": "AI-Generated" if "AI-Generated" in counter_speech else "Template-based"
        },
        "System_Info": {
            "timestamp": results["timestamp"],
            "processing_time_seconds": results["processing_time"],
            "prompt_files_loaded": {
                "counter_speech": bool(agent_system.counter_speech_prompts),
                "moderation": bool(agent_system.moderation_prompts)
            }
        }
    }
    
    return detection_status, detection_scores, counter_speech, agent_summary, all_agent_data

def reload_prompts():
    """Reload prompt files for testing"""
    try:
        agent_system.counter_speech_prompts = agent_system.load_prompts("counter_speech_prompts.json")
        agent_system.moderation_prompts = agent_system.load_prompts("moderation_prompts.json")
        return "✅ Prompts reloaded successfully!"
    except Exception as e:
        return f"❌ Error reloading prompts: {e}"

def get_prompt_info():
    """Get information about loaded prompts"""
    counter_prompts = len(agent_system.counter_speech_prompts.get("counter_speech_prompts", {}))
    moderation_prompts = len(agent_system.moderation_prompts.get("moderation_prompts", {}))
    
    return {
        "counter_speech_prompt_categories": counter_prompts,
        "moderation_prompt_categories": moderation_prompts,
        "prompt_files_status": {
            "counter_speech_prompts.json": "✅ Loaded" if counter_prompts > 0 else "❌ Not found",
            "moderation_prompts.json": "✅ Loaded" if moderation_prompts > 0 else "❌ Not found"
        }
    }

# Create the Gradio interface
with gr.Blocks(
    title="Prompt-Based Multi-Agent Hate Speech Detection System",
    theme=gr.themes.Soft(),
    css="""
    .gradio-container {
        max-width: 1400px !important;
    }
    .prompt-info {
        background: linear-gradient(90deg, #f0f9ff 0%, #e0f2fe 100%);
        padding: 1rem;
        border-radius: 8px;
        border-left: 4px solid #0284c7;
    }
    .agent-summary {
        background: linear-gradient(90deg, #fefce8 0%, #fef3c7 100%);
        padding: 1rem;
        border-radius: 8px;
        border-left: 4px solid #f59e0b;
    }
    """
) as demo:
    
    gr.Markdown("""
    # 🤖 Prompt-Based Multi-Agent Hate Speech Detection System
    
    **Advanced AI Agent Collaboration with Custom Prompts**
    
    🔍 **Detection Agent** - Your fine-tuned DistilBERT model  
    💬 **Counter-Speech Agent** - FLAN-T5 with custom prompt engineering  
    🛡️ **Moderation Agent** - Structured analysis using specialized prompts  
    📊 **Sentiment Agent** - Twitter-RoBERTa for emotional context  
    
    *Each agent uses carefully crafted prompts from external JSON files for optimal performance.*
    """)
    
    with gr.Tab("🤖 Prompt-Based Analysis"):
        with gr.Row():
            with gr.Column(scale=2):
                text_input = gr.Textbox(
                    label="Enter text for comprehensive prompt-based analysis",
                    placeholder="Enter text here to see how prompt-engineered AI agents collaborate...",
                    lines=5,
                    max_lines=15
                )
                
                with gr.Row():
                    analyze_btn = gr.Button("🚀 Run Prompt-Based Analysis", variant="primary", size="lg")
                    clear_btn = gr.Button("🗑️ Clear All", variant="secondary")
                    reload_btn = gr.Button("🔄 Reload Prompts", variant="secondary")
                
                gr.Examples(
                    examples=[
                        ["This is a wonderful day to collaborate and learn from each other!"],
                        ["I appreciate everyone's different perspectives and backgrounds."],
                        ["Let's work together to build a more inclusive community."],
                        ["Thank you for sharing your experience. I'd love to understand your viewpoint better."],
                        ["The diversity in our group makes our discussions much richer and more meaningful."],
                        ["I respectfully disagree, but I value your right to express your opinion."]
                    ],
                    inputs=text_input,
                    label="📝 Try these examples with prompt-based agents:"
                )
        
        with gr.Row():
            with gr.Column():
                detection_output = gr.Textbox(
                    label="🎯 Primary Detection Result",
                    interactive=False,
                    lines=3
                )
                
                agent_summary = gr.Textbox(
                    label="🤖 Prompt-Based Agent Summary",
                    interactive=False,
                    lines=8,
                    elem_classes=["agent-summary"]
                )
                
            with gr.Column():
                counter_speech_output = gr.Textbox(
                    label="💬 Prompt-Generated Counter-Speech",
                    interactive=False,
                    lines=6
                )
                
                reload_status = gr.Textbox(
                    label="🔄 Prompt Reload Status",
                    interactive=False,
                    lines=2
                )
        
        with gr.Row():
            all_agents_output = gr.JSON(
                label="📊 Complete Prompt-Based Multi-Agent Analysis",
                visible=True
            )
    
    with gr.Tab("📝 Prompt Management"):
        with gr.Row():
            with gr.Column():
                gr.Markdown("""
                ## 📝 Counter-Speech Prompts
                
                The system uses specialized prompts for different risk levels:
                
                ### 🚨 High Risk Prompts
                - **Purpose**: Address clear hate speech with educational responses
                - **Tone**: Firm but respectful, educational focus
                - **Length**: 50-100 words
                - **Goal**: De-escalation and education
                
                ### ⚠️ Medium Risk Prompts
                - **Purpose**: Handle potentially problematic content
                - **Tone**: Gentle guidance, supportive
                - **Length**: 40-80 words
                - **Goal**: Reflection and improvement
                
                ### ⚡ Low Risk Prompts
                - **Purpose**: Encourage even better communication
                - **Tone**: Positive reinforcement
                - **Length**: 30-60 words
                - **Goal**: Enhancement and encouragement
                """)
                
            with gr.Column():
                gr.Markdown("""
                ## 🛡️ Moderation Prompts
                
                Structured analysis prompts for comprehensive assessment:
                
                ### 🔍 Comprehensive Analysis
                - **Safety Assessment**: SAFE/CONCERNING/HARMFUL
                - **Violation Categories**: Specific policy areas
                - **Severity Levels**: LOW/MEDIUM/HIGH
                - **Confidence Scoring**: 0-100% certainty
                - **Contextual Factors**: Cultural and situational
                
                ### 📊 Specialized Analysis Types
                - **Hate Speech Focus**: Protected group targeting
                - **Toxicity Assessment**: Discourse quality impact
                - **Context Analysis**: Cultural and situational factors
                - **Action Recommendations**: Specific moderation steps
                """)
        
        with gr.Row():
            prompt_info_output = gr.JSON(
                label="📋 Current Prompt Configuration",
                value=get_prompt_info()
            )
        
        gr.Markdown("""
        ## 📁 Prompt File Structure
        
        To customize the system behavior, create these JSON files:
        
        ### `counter_speech_prompts.json`
        ```json
        {
          "counter_speech_prompts": {
            "high_risk": {
              "system_prompt": "You are an expert educator...",
              "user_prompt_template": "Generate response for: {original_text}..."
            }
          }
        }
        ```
        
        ### `moderation_prompts.json`
        ```json
        {
          "moderation_prompts": {
            "comprehensive_analysis": {
              "system_prompt": "You are a content moderation expert...",
              "user_prompt_template": "Analyze: {text}..."
            }
          }
        }
        ```
        
        **Benefits of External Prompts:**
        - 🎯 **Fine-tuned control** over agent behavior
        - 🔄 **Easy iteration** without code changes
        - 📊 **A/B testing** of different prompt strategies
        - 🎨 **Domain-specific customization** for different platforms
        - 📈 **Performance optimization** through prompt engineering
        """)
    
    with gr.Tab("🔧 System Architecture"):
        gr.Markdown("""
        ## 🏗️ Prompt-Based Agent Architecture
        
        ### 🔄 Agent Collaboration Flow
        ```
        Input Text
        ├── Detection Agent → Risk Classification (DistilBERT)
        ├── Sentiment Agent → Emotional Context (RoBERTa)
        ├── Moderation Agent → Structured Analysis (FLAN-T5 + Prompts)
        └── Counter-Speech Agent → Educational Response (FLAN-T5 + Prompts)
               ↑
        Uses custom prompts and outputs from all other agents
        ```
        
        ### 📝 Prompt Engineering Advantages
        
        #### 🎯 **Precision Control**
        - **Task-specific instructions** for each scenario
        - **Tone and style guidelines** for appropriate responses
        - **Length and format specifications** for consistency
        - **Context integration** from multiple agent outputs
        
        #### 🔄 **Iterative Improvement**
        - **Hot-swappable prompts** without system restart
        - **A/B testing capabilities** for prompt effectiveness
        - **Performance metrics** tracking for optimization
        - **Domain adaptation** for different use cases
        
        #### 🛡️ **Quality Assurance**
        - **Bias mitigation** through careful prompt design
        - **Safety guardrails** built into prompt structure
        - **Consistency enforcement** across all responses
        - **Cultural sensitivity** considerations
        
        ### 🚀 Production Benefits
        
        - **🎨 Customizable**: Adapt to different platforms and communities
        - **📈 Scalable**: Easy to add new prompt categories
        - **🔧 Maintainable**: Update behavior without code deployment
        - **📊 Measurable**: Track prompt performance and effectiveness
        - **🌍 Localizable**: Different prompts for different regions/cultures
        
        ### ⚠️ Deployment Considerations
        
        #### 🔒 Security
        - **Prompt injection protection** for user inputs
        - **Content filtering** on generated responses
        - **Rate limiting** to prevent abuse
        - **Audit logging** for compliance
        
        #### 📊 Monitoring
        - **Response quality metrics** tracking
        - **User feedback integration** for continuous improvement
        - **Error rate monitoring** across different prompt types
        - **Performance benchmarking** against baseline models
        
        #### 👥 Human Oversight
        - **Expert review processes** for prompt updates
        - **Community feedback loops** for prompt effectiveness
        - **Escalation pathways** for edge cases
        - **Regular bias audits** and prompt refinement
        """)
    
    # Event handlers
    analyze_btn.click(
        fn=analyze_text_with_prompts,
        inputs=text_input,
        outputs=[detection_output, all_agents_output, counter_speech_output, agent_summary, all_agents_output]
    )
    
    clear_btn.click(
        fn=lambda: ("", "", "", "", {}),
        outputs=[text_input, detection_output, counter_speech_output, agent_summary, all_agents_output]
    )
    
    reload_btn.click(
        fn=reload_prompts,
        outputs=reload_status
    )

# Launch configuration
if __name__ == "__main__":
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        show_api=False,
        share=False
    )