File size: 34,123 Bytes
2662656
 
 
 
 
 
 
8cdb327
2662656
 
 
 
 
8cdb327
2662656
8cdb327
 
 
 
 
 
 
 
 
 
2662656
8cdb327
 
2662656
8cdb327
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2662656
 
8cdb327
 
2662656
8cdb327
2662656
 
8cdb327
2662656
8cdb327
 
2662656
 
 
8cdb327
2662656
8cdb327
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2662656
 
8cdb327
 
 
 
 
 
 
2662656
8cdb327
 
 
 
 
 
 
 
 
 
 
 
 
2662656
8cdb327
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2662656
 
8cdb327
2662656
 
 
 
 
 
8cdb327
 
2662656
 
 
8cdb327
2662656
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8cdb327
2662656
8cdb327
2662656
 
 
8cdb327
 
2662656
 
8cdb327
 
 
 
2662656
8cdb327
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2662656
8cdb327
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2662656
8cdb327
 
 
2662656
8cdb327
 
 
 
 
 
 
 
2662656
8cdb327
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2662656
 
8cdb327
2662656
8cdb327
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2662656
8cdb327
 
 
2662656
8cdb327
 
 
 
 
 
 
 
 
 
2662656
8cdb327
 
2662656
8cdb327
 
 
 
2662656
8cdb327
 
 
 
 
 
 
 
 
 
2662656
8cdb327
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2662656
8cdb327
2662656
8cdb327
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2662656
 
 
8cdb327
2662656
 
 
 
 
8cdb327
 
2662656
 
8cdb327
 
 
 
 
 
 
2662656
 
 
 
 
8cdb327
 
 
2662656
8cdb327
 
 
 
2662656
8cdb327
2662656
 
8cdb327
2662656
 
 
8cdb327
 
2662656
 
 
 
 
8cdb327
2662656
8cdb327
2662656
 
 
8cdb327
 
 
 
 
 
2662656
 
8cdb327
2662656
 
 
8cdb327
 
 
2662656
 
 
 
8cdb327
 
2662656
8cdb327
 
2662656
 
8cdb327
 
 
2662656
8cdb327
 
 
 
 
 
 
2662656
 
 
8cdb327
 
2662656
 
 
8cdb327
2662656
 
8cdb327
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2662656
 
 
8cdb327
 
 
2662656
8cdb327
 
 
 
 
 
2662656
8cdb327
 
 
 
 
2662656
8cdb327
 
 
 
 
 
 
2662656
8cdb327
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2662656
8cdb327
 
 
 
 
 
 
 
 
 
 
2662656
8cdb327
 
 
 
 
 
 
 
 
 
 
2662656
8cdb327
2662656
8cdb327
 
 
 
 
 
 
2662656
 
8cdb327
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2662656
 
 
 
8cdb327
2662656
8cdb327
2662656
 
 
 
8cdb327
 
 
 
 
 
2662656
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
import numpy as np
import json
from datetime import datetime
import logging
import os

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class PromptBasedMultiAgentSystem:
    def __init__(self):
        self.detection_agent = None
        self.counter_speech_agent = None
        self.moderation_agent = None
        self.sentiment_agent = None
        
        # Load prompt configurations
        self.counter_speech_prompts = self.load_prompts("counter_speech_prompts.json")
        self.moderation_prompts = self.load_prompts("moderation_prompts.json")
        
        self.initialize_agents()
    
    def load_prompts(self, filename):
        """Load prompts from JSON file with fallback"""
        try:
            if os.path.exists(filename):
                with open(filename, 'r', encoding='utf-8') as f:
                    return json.load(f)
            else:
                logger.warning(f"Prompt file {filename} not found, using built-in prompts")
                return self.get_default_prompts(filename)
        except Exception as e:
            logger.error(f"Error loading prompts from {filename}: {e}")
            return self.get_default_prompts(filename)
    
    def get_default_prompts(self, filename):
        """Default prompts as fallback"""
        if "counter_speech" in filename:
            return {
                "counter_speech_prompts": {
                    "high_risk": {
                        "system_prompt": "You are an expert educator specializing in counter-speech and conflict de-escalation.",
                        "user_prompt_template": "Generate a respectful, educational counter-speech response to address harmful content while promoting understanding. Original text (Risk: {risk_level}, Confidence: {confidence}%, Sentiment: {sentiment}): \"{original_text}\"\n\nCounter-speech response:",
                    },
                    "general_template": {
                        "fallback_responses": [
                            "Thank you for sharing your thoughts. Building strong communities works best when we focus on shared values and constructive dialogue. How might we work together on the concerns you've raised?",
                            "I appreciate your perspective. Sometimes our strongest feelings can be expressed in ways that bring people together. What specific positive changes would you like to see?",
                            "Your engagement with this topic is clear. When we channel that energy into inclusive dialogue, we often find solutions that work for everyone."
                        ]
                    }
                }
            }
        else:
            return {
                "moderation_prompts": {
                    "comprehensive_analysis": {
                        "system_prompt": "You are an expert content moderation specialist analyzing text for safety and compliance.",
                        "user_prompt_template": "Analyze this text for potential violations: \"{text}\"\n\nProvide: 1) Safety assessment 2) Violation categories 3) Severity level 4) Confidence score 5) Recommended action\n\nAnalysis:",
                    }
                }
            }
    
    def initialize_agents(self):
        """Initialize all AI agents"""
        logger.info("🤖 Initializing Prompt-Based Multi-Agent System...")
        
        self.setup_detection_agent()
        self.setup_counter_speech_agent()
        self.setup_moderation_agent()
        self.setup_sentiment_agent()
        
        logger.info("✅ All agents initialized successfully!")
    
    def setup_detection_agent(self):
        """Initialize the hate speech detection agent"""
        try:
            logger.info("🔍 Loading Detection Agent (Fine-tuned DistilBERT)...")
            model_path = "./model"
            
            tokenizer = AutoTokenizer.from_pretrained(model_path)
            model = AutoModelForSequenceClassification.from_pretrained(
                model_path,
                torch_dtype=torch.float32
            )
            
            self.detection_agent = pipeline(
                "text-classification",
                model=model,
                tokenizer=tokenizer,
                return_all_scores=True,
                device=0 if torch.cuda.is_available() else -1
            )
            logger.info("✅ Detection Agent loaded successfully")
            
        except Exception as e:
            logger.error(f"❌ Detection Agent failed: {e}")
            logger.info("🔄 Using fallback detection model...")
            self.detection_agent = pipeline(
                "text-classification",
                model="unitary/toxic-bert",
                return_all_scores=True
            )
    
    def setup_counter_speech_agent(self):
        """Initialize counter-speech generation agent with prompts"""
        try:
            logger.info("💬 Loading Counter-Speech Agent with Custom Prompts...")
            
            # Using FLAN-T5 which is excellent at following instructions
            self.counter_speech_agent = pipeline(
                "text2text-generation",
                model="google/flan-t5-base",
                max_length=200,
                do_sample=True,
                temperature=0.7,
                top_p=0.9,
                device=0 if torch.cuda.is_available() else -1
            )
            logger.info("✅ Counter-Speech Agent loaded (FLAN-T5 with custom prompts)")
            
        except Exception as e:
            logger.error(f"❌ Counter-Speech Agent failed: {e}")
            self.counter_speech_agent = None
    
    def setup_moderation_agent(self):
        """Initialize content moderation agent with prompts"""
        try:
            logger.info("🛡️ Loading Moderation Agent with Custom Prompts...")
            
            # Using FLAN-T5 for structured moderation analysis
            self.moderation_agent = pipeline(
                "text2text-generation",
                model="google/flan-t5-base",
                max_length=300,
                do_sample=False,
                device=0 if torch.cuda.is_available() else -1
            )
            logger.info("✅ Moderation Agent loaded (FLAN-T5 with analysis prompts)")
            
        except Exception as e:
            logger.error(f"❌ Moderation Agent failed: {e}")
            self.moderation_agent = None
    
    def setup_sentiment_agent(self):
        """Initialize sentiment analysis agent"""
        try:
            logger.info("📊 Loading Sentiment Agent...")
            
            self.sentiment_agent = pipeline(
                "sentiment-analysis",
                model="cardiffnlp/twitter-roberta-base-sentiment-latest",
                return_all_scores=True,
                device=0 if torch.cuda.is_available() else -1
            )
            logger.info("✅ Sentiment Agent loaded (Twitter-RoBERTa)")
            
        except Exception as e:
            logger.error(f"❌ Sentiment Agent failed: {e}")
            self.sentiment_agent = None
    
    def detect_hate_speech(self, text):
        """Detection Agent: Analyze text for hate speech"""
        if not text or not text.strip():
            return {
                "status": "❌ Please enter some text to analyze.",
                "prediction": "No input",
                "confidence": 0.0,
                "all_scores": {},
                "risk_level": "Unknown",
                "is_hate_speech": False
            }
        
        try:
            results = self.detection_agent(text.strip())
            
            if isinstance(results, list) and len(results) > 0:
                if isinstance(results[0], list):
                    results = results[0]
                
                all_scores = {}
                max_score = 0
                predicted_label = "UNKNOWN"
                
                for result in results:
                    label = result["label"]
                    score = result["score"]
                    all_scores[label] = {
                        "score": score,
                        "percentage": f"{score*100:.2f}%",
                        "confidence": f"{score:.4f}"
                    }
                    
                    if score > max_score:
                        max_score = score
                        predicted_label = label
                
                # Determine hate speech status
                hate_keywords = ["HATE", "TOXIC", "NEGATIVE", "HARMFUL", "1", "LABEL_1"]
                
                is_hate_speech = False
                risk_level = "Low"
                
                if any(keyword in predicted_label.upper() for keyword in hate_keywords):
                    if max_score > 0.8:
                        is_hate_speech = True
                        risk_level = "High"
                        status = f"🚨 High confidence hate speech detected! ({predicted_label}: {max_score:.2%})"
                    elif max_score > 0.6:
                        is_hate_speech = True
                        risk_level = "Medium"
                        status = f"⚠️ Potential hate speech detected ({predicted_label}: {max_score:.2%})"
                    else:
                        risk_level = "Low-Medium"
                        status = f"⚡ Low confidence detection ({predicted_label}: {max_score:.2%})"
                else:
                    risk_level = "Low"
                    status = f"✅ No hate speech detected ({predicted_label}: {max_score:.2%})"
                
                return {
                    "status": status,
                    "prediction": predicted_label,
                    "confidence": max_score,
                    "all_scores": all_scores,
                    "risk_level": risk_level,
                    "is_hate_speech": is_hate_speech
                }
                
        except Exception as e:
            logger.error(f"Detection error: {e}")
            return {
                "status": f"❌ Detection error: {str(e)}",
                "prediction": "Error",
                "confidence": 0.0,
                "all_scores": {},
                "risk_level": "Unknown",
                "is_hate_speech": False
            }
    
    def analyze_sentiment(self, text):
        """Sentiment Agent: Analyze emotional tone"""
        if not self.sentiment_agent or not text.strip():
            return {"sentiment": "neutral", "confidence": 0.0}
        
        try:
            results = self.sentiment_agent(text.strip())
            if isinstance(results, list) and len(results) > 0:
                if isinstance(results[0], list):
                    results = results[0]
                
                best_sentiment = max(results, key=lambda x: x['score'])
                return {
                    "sentiment": best_sentiment['label'].lower(),
                    "confidence": best_sentiment['score'],
                    "all_sentiments": {r['label']: r['score'] for r in results}
                }
        except Exception as e:
            logger.error(f"Sentiment analysis error: {e}")
            return {"sentiment": "neutral", "confidence": 0.0}
    
    def moderate_content_with_prompts(self, text, detection_result, sentiment_result):
        """Moderation Agent: Structured analysis using prompts"""
        if not self.moderation_agent or not text.strip():
            return {"analysis": "Unable to perform moderation analysis", "confidence": 0.0}
        
        try:
            # Get the appropriate moderation prompt
            moderation_config = self.moderation_prompts.get("moderation_prompts", {})
            analysis_config = moderation_config.get("comprehensive_analysis", {})
            
            # Construct the analysis prompt
            system_prompt = analysis_config.get("system_prompt", "Analyze this text for safety concerns.")
            user_prompt_template = analysis_config.get("user_prompt_template", "Analyze: {text}")
            
            # Fill in the template
            full_prompt = f"{system_prompt}\n\n{user_prompt_template.format(text=text)}"
            
            # Generate analysis
            result = self.moderation_agent(full_prompt, max_length=250, do_sample=False)
            
            if result and len(result) > 0:
                analysis_text = result[0]['generated_text']
                
                # Parse the analysis for key information
                confidence = self.extract_confidence_from_analysis(analysis_text)
                safety_level = self.extract_safety_level_from_analysis(analysis_text)
                
                return {
                    "analysis": analysis_text,
                    "confidence": confidence,
                    "safety_level": safety_level,
                    "prompt_used": "comprehensive_analysis"
                }
            
        except Exception as e:
            logger.error(f"Moderation analysis error: {e}")
            
        # Fallback analysis
        return {
            "analysis": f"Basic assessment: Risk level {detection_result.get('risk_level', 'unknown')}, requires review if confidence > 70%",
            "confidence": detection_result.get('confidence', 0.0),
            "safety_level": "review_needed" if detection_result.get('confidence', 0) > 0.7 else "acceptable"
        }
    
    def generate_counter_speech_with_prompts(self, text, detection_result, sentiment_result):
        """Counter-Speech Agent: Generate response using custom prompts"""
        if not detection_result.get("is_hate_speech", False):
            return "✨ This text promotes positive communication. Great job maintaining respectful dialogue!"
        
        risk_level = detection_result.get("risk_level", "Low").lower()
        confidence = detection_result.get("confidence", 0.0) * 100
        sentiment = sentiment_result.get("sentiment", "neutral")
        
        # Get appropriate prompts based on risk level
        counter_speech_config = self.counter_speech_prompts.get("counter_speech_prompts", {})
        
        # Select prompt based on risk level
        if risk_level == "high":
            prompt_config = counter_speech_config.get("high_risk", {})
        elif risk_level == "medium":
            prompt_config = counter_speech_config.get("medium_risk", {})
        else:
            prompt_config = counter_speech_config.get("low_risk", {})
        
        # If no specific config, use general template
        if not prompt_config:
            prompt_config = counter_speech_config.get("general_template", {})
        
        if self.counter_speech_agent and prompt_config:
            try:
                # Construct the prompt
                system_prompt = prompt_config.get("system_prompt", "Generate a respectful counter-speech response.")
                user_prompt_template = prompt_config.get("user_prompt_template", 
                    "Generate a counter-speech response for: {original_text}")
                
                # Fill in the template
                full_prompt = f"{system_prompt}\n\n{user_prompt_template.format(original_text=text, risk_level=risk_level, confidence=confidence, sentiment=sentiment)}"
                
                # Generate counter-speech
                result = self.counter_speech_agent(full_prompt, max_length=150, do_sample=True, temperature=0.7)
                
                if result and len(result) > 0:
                    generated_text = result[0]['generated_text']
                    
                    # Clean up the response
                    if "Counter-speech response:" in generated_text:
                        generated_text = generated_text.split("Counter-speech response:")[-1].strip()
                    elif "response:" in generated_text.lower():
                        parts = generated_text.lower().split("response:")
                        if len(parts) > 1:
                            generated_text = parts[-1].strip()
                    
                    return f"🤖 **AI-Generated Counter-Speech** ({risk_level.title()} Risk): {generated_text}"
            
            except Exception as e:
                logger.error(f"Counter-speech generation error: {e}")
        
        # Fallback to template responses
        fallback_responses = counter_speech_config.get("general_template", {}).get("fallback_responses", [
            "Thank you for sharing your thoughts. Building strong communities works best when we focus on shared values and constructive dialogue."
        ])
        
        import random
        return f"📝 **Template Response** ({risk_level.title()} Risk): {random.choice(fallback_responses)}"
    
    def extract_confidence_from_analysis(self, analysis_text):
        """Extract confidence score from moderation analysis"""
        import re
        # Look for confidence patterns like "85%" or "confidence: 0.85"
        patterns = [
            r'(\d+)%',
            r'confidence[:\s]+(\d*\.?\d+)',
            r'(\d*\.?\d+)\s*confidence'
        ]
        
        for pattern in patterns:
            match = re.search(pattern, analysis_text.lower())
            if match:
                value = float(match.group(1))
                return value / 100 if value > 1 else value
        
        return 0.5  # Default moderate confidence
    
    def extract_safety_level_from_analysis(self, analysis_text):
        """Extract safety assessment from moderation analysis"""
        analysis_lower = analysis_text.lower()
        
        if any(word in analysis_lower for word in ['harmful', 'high risk', 'remove', 'violation']):
            return "harmful"
        elif any(word in analysis_lower for word in ['concerning', 'medium risk', 'review', 'warning']):
            return "concerning"
        elif any(word in analysis_lower for word in ['safe', 'low risk', 'acceptable', 'approve']):
            return "safe"
        else:
            return "review_needed"
    
    def comprehensive_analysis(self, text):
        """Run all agents with prompt-based analysis"""
        start_time = datetime.now()
        
        # Run core agents
        detection_result = self.detect_hate_speech(text)
        sentiment_result = self.analyze_sentiment(text)
        
        # Run prompt-based agents
        moderation_result = self.moderate_content_with_prompts(text, detection_result, sentiment_result)
        counter_speech = self.generate_counter_speech_with_prompts(text, detection_result, sentiment_result)
        
        processing_time = (datetime.now() - start_time).total_seconds()
        
        return {
            "detection": detection_result,
            "sentiment": sentiment_result,
            "moderation": moderation_result,
            "counter_speech": counter_speech,
            "processing_time": processing_time,
            "timestamp": datetime.now().isoformat()
        }

# Initialize the system
logger.info("🚀 Starting Prompt-Based Multi-Agent System...")
agent_system = PromptBasedMultiAgentSystem()

def analyze_text_with_prompts(text):
    """Main analysis function using prompt-based agents"""
    if not text or not text.strip():
        return (
            "❌ Please enter some text to analyze.",
            {},
            "No analysis performed.",
            "No input provided",
            {}
        )
    
    # Run comprehensive analysis with prompts
    results = agent_system.comprehensive_analysis(text)
    
    # Extract results for display
    detection_status = results["detection"]["status"]
    detection_scores = results["detection"]["all_scores"]
    counter_speech = results["counter_speech"]
    
    # Create detailed agent summary
    agent_summary = f"""
🔍 **Detection Agent**: {results['detection']['risk_level']} risk ({results['detection']['confidence']:.2%} confidence)
📊 **Sentiment Agent**: {results['sentiment']['sentiment'].title()} ({results['sentiment']['confidence']:.2%} confidence)
🛡️ **Moderation Agent**: {results['moderation'].get('safety_level', 'unknown').title()} safety level ({results['moderation'].get('confidence', 0):.2%} confidence)
💬 **Counter-Speech Agent**: {"Custom prompt-based" if "AI-Generated" in counter_speech else "Template-based"} response
⏱️ **Processing Time**: {results['processing_time']:.3f} seconds

📋 **Moderation Analysis**: {results['moderation'].get('analysis', 'No detailed analysis available')[:200]}...
"""
    
    # Compile comprehensive agent data
    all_agent_data = {
        "Detection_Analysis": {
            "scores": detection_scores,
            "risk_level": results['detection']['risk_level'],
            "is_hate_speech": results['detection']['is_hate_speech']
        },
        "Sentiment_Analysis": {
            "primary_sentiment": results['sentiment']['sentiment'],
            "all_sentiments": results["sentiment"].get("all_sentiments", {})
        },
        "Moderation_Analysis": {
            "safety_assessment": results['moderation'].get('safety_level', 'unknown'),
            "detailed_analysis": results['moderation'].get('analysis', ''),
            "confidence": results['moderation'].get('confidence', 0),
            "prompt_used": results['moderation'].get('prompt_used', 'fallback')
        },
        "Counter_Speech": {
            "response": counter_speech,
            "generation_method": "AI-Generated" if "AI-Generated" in counter_speech else "Template-based"
        },
        "System_Info": {
            "timestamp": results["timestamp"],
            "processing_time_seconds": results["processing_time"],
            "prompt_files_loaded": {
                "counter_speech": bool(agent_system.counter_speech_prompts),
                "moderation": bool(agent_system.moderation_prompts)
            }
        }
    }
    
    return detection_status, detection_scores, counter_speech, agent_summary, all_agent_data

def reload_prompts():
    """Reload prompt files for testing"""
    try:
        agent_system.counter_speech_prompts = agent_system.load_prompts("counter_speech_prompts.json")
        agent_system.moderation_prompts = agent_system.load_prompts("moderation_prompts.json")
        return "✅ Prompts reloaded successfully!"
    except Exception as e:
        return f"❌ Error reloading prompts: {e}"

def get_prompt_info():
    """Get information about loaded prompts"""
    counter_prompts = len(agent_system.counter_speech_prompts.get("counter_speech_prompts", {}))
    moderation_prompts = len(agent_system.moderation_prompts.get("moderation_prompts", {}))
    
    return {
        "counter_speech_prompt_categories": counter_prompts,
        "moderation_prompt_categories": moderation_prompts,
        "prompt_files_status": {
            "counter_speech_prompts.json": "✅ Loaded" if counter_prompts > 0 else "❌ Not found",
            "moderation_prompts.json": "✅ Loaded" if moderation_prompts > 0 else "❌ Not found"
        }
    }

# Create the Gradio interface
with gr.Blocks(
    title="Prompt-Based Multi-Agent Hate Speech Detection System",
    theme=gr.themes.Soft(),
    css="""
    .gradio-container {
        max-width: 1400px !important;
    }
    .prompt-info {
        background: linear-gradient(90deg, #f0f9ff 0%, #e0f2fe 100%);
        padding: 1rem;
        border-radius: 8px;
        border-left: 4px solid #0284c7;
    }
    .agent-summary {
        background: linear-gradient(90deg, #fefce8 0%, #fef3c7 100%);
        padding: 1rem;
        border-radius: 8px;
        border-left: 4px solid #f59e0b;
    }
    """
) as demo:
    
    gr.Markdown("""
    # 🤖 Prompt-Based Multi-Agent Hate Speech Detection System
    
    **Advanced AI Agent Collaboration with Custom Prompts**
    
    🔍 **Detection Agent** - Your fine-tuned DistilBERT model  
    💬 **Counter-Speech Agent** - FLAN-T5 with custom prompt engineering  
    🛡️ **Moderation Agent** - Structured analysis using specialized prompts  
    📊 **Sentiment Agent** - Twitter-RoBERTa for emotional context  
    
    *Each agent uses carefully crafted prompts from external JSON files for optimal performance.*
    """)
    
    with gr.Tab("🤖 Prompt-Based Analysis"):
        with gr.Row():
            with gr.Column(scale=2):
                text_input = gr.Textbox(
                    label="Enter text for comprehensive prompt-based analysis",
                    placeholder="Enter text here to see how prompt-engineered AI agents collaborate...",
                    lines=5,
                    max_lines=15
                )
                
                with gr.Row():
                    analyze_btn = gr.Button("🚀 Run Prompt-Based Analysis", variant="primary", size="lg")
                    clear_btn = gr.Button("🗑️ Clear All", variant="secondary")
                    reload_btn = gr.Button("🔄 Reload Prompts", variant="secondary")
                
                gr.Examples(
                    examples=[
                        ["This is a wonderful day to collaborate and learn from each other!"],
                        ["I appreciate everyone's different perspectives and backgrounds."],
                        ["Let's work together to build a more inclusive community."],
                        ["Thank you for sharing your experience. I'd love to understand your viewpoint better."],
                        ["The diversity in our group makes our discussions much richer and more meaningful."],
                        ["I respectfully disagree, but I value your right to express your opinion."]
                    ],
                    inputs=text_input,
                    label="📝 Try these examples with prompt-based agents:"
                )
        
        with gr.Row():
            with gr.Column():
                detection_output = gr.Textbox(
                    label="🎯 Primary Detection Result",
                    interactive=False,
                    lines=3
                )
                
                agent_summary = gr.Textbox(
                    label="🤖 Prompt-Based Agent Summary",
                    interactive=False,
                    lines=8,
                    elem_classes=["agent-summary"]
                )
                
            with gr.Column():
                counter_speech_output = gr.Textbox(
                    label="💬 Prompt-Generated Counter-Speech",
                    interactive=False,
                    lines=6
                )
                
                reload_status = gr.Textbox(
                    label="🔄 Prompt Reload Status",
                    interactive=False,
                    lines=2
                )
        
        with gr.Row():
            all_agents_output = gr.JSON(
                label="📊 Complete Prompt-Based Multi-Agent Analysis",
                visible=True
            )
    
    with gr.Tab("📝 Prompt Management"):
        with gr.Row():
            with gr.Column():
                gr.Markdown("""
                ## 📝 Counter-Speech Prompts
                
                The system uses specialized prompts for different risk levels:
                
                ### 🚨 High Risk Prompts
                - **Purpose**: Address clear hate speech with educational responses
                - **Tone**: Firm but respectful, educational focus
                - **Length**: 50-100 words
                - **Goal**: De-escalation and education
                
                ### ⚠️ Medium Risk Prompts
                - **Purpose**: Handle potentially problematic content
                - **Tone**: Gentle guidance, supportive
                - **Length**: 40-80 words
                - **Goal**: Reflection and improvement
                
                ### ⚡ Low Risk Prompts
                - **Purpose**: Encourage even better communication
                - **Tone**: Positive reinforcement
                - **Length**: 30-60 words
                - **Goal**: Enhancement and encouragement
                """)
                
            with gr.Column():
                gr.Markdown("""
                ## 🛡️ Moderation Prompts
                
                Structured analysis prompts for comprehensive assessment:
                
                ### 🔍 Comprehensive Analysis
                - **Safety Assessment**: SAFE/CONCERNING/HARMFUL
                - **Violation Categories**: Specific policy areas
                - **Severity Levels**: LOW/MEDIUM/HIGH
                - **Confidence Scoring**: 0-100% certainty
                - **Contextual Factors**: Cultural and situational
                
                ### 📊 Specialized Analysis Types
                - **Hate Speech Focus**: Protected group targeting
                - **Toxicity Assessment**: Discourse quality impact
                - **Context Analysis**: Cultural and situational factors
                - **Action Recommendations**: Specific moderation steps
                """)
        
        with gr.Row():
            prompt_info_output = gr.JSON(
                label="📋 Current Prompt Configuration",
                value=get_prompt_info()
            )
        
        gr.Markdown("""
        ## 📁 Prompt File Structure
        
        To customize the system behavior, create these JSON files:
        
        ### `counter_speech_prompts.json`
        ```json
        {
          "counter_speech_prompts": {
            "high_risk": {
              "system_prompt": "You are an expert educator...",
              "user_prompt_template": "Generate response for: {original_text}..."
            }
          }
        }
        ```
        
        ### `moderation_prompts.json`
        ```json
        {
          "moderation_prompts": {
            "comprehensive_analysis": {
              "system_prompt": "You are a content moderation expert...",
              "user_prompt_template": "Analyze: {text}..."
            }
          }
        }
        ```
        
        **Benefits of External Prompts:**
        - 🎯 **Fine-tuned control** over agent behavior
        - 🔄 **Easy iteration** without code changes
        - 📊 **A/B testing** of different prompt strategies
        - 🎨 **Domain-specific customization** for different platforms
        - 📈 **Performance optimization** through prompt engineering
        """)
    
    with gr.Tab("🔧 System Architecture"):
        gr.Markdown("""
        ## 🏗️ Prompt-Based Agent Architecture
        
        ### 🔄 Agent Collaboration Flow
        ```
        Input Text
        ├── Detection Agent → Risk Classification (DistilBERT)
        ├── Sentiment Agent → Emotional Context (RoBERTa)
        ├── Moderation Agent → Structured Analysis (FLAN-T5 + Prompts)
        └── Counter-Speech Agent → Educational Response (FLAN-T5 + Prompts)

        Uses custom prompts and outputs from all other agents
        ```
        
        ### 📝 Prompt Engineering Advantages
        
        #### 🎯 **Precision Control**
        - **Task-specific instructions** for each scenario
        - **Tone and style guidelines** for appropriate responses
        - **Length and format specifications** for consistency
        - **Context integration** from multiple agent outputs
        
        #### 🔄 **Iterative Improvement**
        - **Hot-swappable prompts** without system restart
        - **A/B testing capabilities** for prompt effectiveness
        - **Performance metrics** tracking for optimization
        - **Domain adaptation** for different use cases
        
        #### 🛡️ **Quality Assurance**
        - **Bias mitigation** through careful prompt design
        - **Safety guardrails** built into prompt structure
        - **Consistency enforcement** across all responses
        - **Cultural sensitivity** considerations
        
        ### 🚀 Production Benefits
        
        - **🎨 Customizable**: Adapt to different platforms and communities
        - **📈 Scalable**: Easy to add new prompt categories
        - **🔧 Maintainable**: Update behavior without code deployment
        - **📊 Measurable**: Track prompt performance and effectiveness
        - **🌍 Localizable**: Different prompts for different regions/cultures
        
        ### ⚠️ Deployment Considerations
        
        #### 🔒 Security
        - **Prompt injection protection** for user inputs
        - **Content filtering** on generated responses
        - **Rate limiting** to prevent abuse
        - **Audit logging** for compliance
        
        #### 📊 Monitoring
        - **Response quality metrics** tracking
        - **User feedback integration** for continuous improvement
        - **Error rate monitoring** across different prompt types
        - **Performance benchmarking** against baseline models
        
        #### 👥 Human Oversight
        - **Expert review processes** for prompt updates
        - **Community feedback loops** for prompt effectiveness
        - **Escalation pathways** for edge cases
        - **Regular bias audits** and prompt refinement
        """)
    
    # Event handlers
    analyze_btn.click(
        fn=analyze_text_with_prompts,
        inputs=text_input,
        outputs=[detection_output, all_agents_output, counter_speech_output, agent_summary, all_agents_output]
    )
    
    clear_btn.click(
        fn=lambda: ("", "", "", "", {}),
        outputs=[text_input, detection_output, counter_speech_output, agent_summary, all_agents_output]
    )
    
    reload_btn.click(
        fn=reload_prompts,
        outputs=reload_status
    )

# Launch configuration
if __name__ == "__main__":
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        show_api=False,
        share=False
    )