File size: 33,612 Bytes
1ebd84a
 
a50d483
 
c02b5d2
1ebd84a
 
 
5cbe56c
a50d483
1ebd84a
c02b5d2
 
a50d483
 
 
1ebd84a
a50d483
 
fd21bc1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27d1197
fd21bc1
 
 
 
 
 
27d1197
fd21bc1
 
 
 
 
 
59a4329
 
fd21bc1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27d1197
fd21bc1
 
 
 
 
27d1197
fd21bc1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27d1197
 
fd21bc1
 
 
27d1197
fd21bc1
 
 
 
 
 
 
 
 
 
 
27d1197
fd21bc1
 
 
 
 
27d1197
 
 
fd21bc1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27d1197
fd21bc1
 
 
27d1197
fd21bc1
 
933510e
fd21bc1
 
 
 
 
 
 
 
933510e
fd21bc1
 
27d1197
fd21bc1
 
 
 
 
 
 
 
 
 
 
 
27d1197
fd21bc1
 
 
 
27d1197
fd21bc1
27d1197
fd21bc1
 
27d1197
fd21bc1
 
27d1197
fd21bc1
 
27d1197
fd21bc1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27d1197
 
 
fd21bc1
 
 
27d1197
fd21bc1
 
 
 
 
 
 
 
27d1197
 
 
 
fd21bc1
 
 
 
 
 
27d1197
fd21bc1
27d1197
fd21bc1
 
 
 
 
27d1197
 
 
 
fd21bc1
 
 
 
 
 
 
27d1197
fd21bc1
 
 
 
 
 
 
27d1197
 
 
fd21bc1
 
 
 
 
 
 
 
 
 
 
 
 
 
27d1197
fd21bc1
 
27d1197
 
 
 
fd21bc1
 
27d1197
fd21bc1
27d1197
fd21bc1
 
 
 
27d1197
fd21bc1
 
 
 
 
 
 
 
 
27d1197
fd21bc1
 
 
27d1197
 
fd21bc1
 
 
 
 
 
 
 
 
 
 
 
 
 
27d1197
fd21bc1
 
 
27d1197
fd21bc1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27d1197
fd21bc1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
933510e
fd21bc1
27d1197
fd21bc1
 
 
27d1197
 
 
 
fd21bc1
 
27d1197
fd21bc1
 
27d1197
fd21bc1
27d1197
fd21bc1
 
 
 
27d1197
fd21bc1
27d1197
fd21bc1
 
 
27d1197
fd21bc1
 
 
 
 
 
27d1197
fd21bc1
 
 
 
 
 
 
 
 
 
27d1197
fd21bc1
 
 
 
27d1197
 
 
fd21bc1
 
 
 
 
 
 
 
 
 
 
 
27d1197
fd21bc1
 
 
27d1197
fd21bc1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27d1197
fd21bc1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27d1197
fd21bc1
 
27d1197
fd21bc1
 
 
 
27d1197
 
 
 
fd21bc1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f3be032
 
fd21bc1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27d1197
fd21bc1
 
 
 
 
 
27d1197
fd21bc1
 
 
 
27d1197
fd21bc1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
import gradio as gr
import torch
from diffusers import StableDiffusionXLPipeline, StableDiffusionXLImg2ImgPipeline, EulerAncestralDiscreteScheduler
from PIL import Image, PngImagePlugin, ImageFilter
from datetime import datetime
import os
import gc
import time
import spaces
from typing import Optional, Tuple, Dict, Any
from huggingface_hub import hf_hub_download
import tempfile
import random
import logging
import torch.nn.functional as F
from transformers import CLIPProcessor, CLIPModel

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Constants
MODEL_REPO = "ajsbsd/CyberRealistic-Pony"
MODEL_FILENAME = "cyberrealisticPony_v110.safetensors"
NSFW_MODEL_ID = "openai/clip-vit-base-patch32"  # CLIP model for NSFW detection
MAX_SEED = 2**32 - 1
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DTYPE = torch.float16 if DEVICE == "cuda" else torch.float32
NSFW_THRESHOLD = 0.25  # Threshold for NSFW detection

# Global pipeline state
class PipelineManager:
    def __init__(self):
        self.txt2img_pipe = None
        self.img2img_pipe = None
        self.nsfw_detector_model = None
        self.nsfw_detector_processor = None
        self.model_loaded = False
        self.nsfw_detector_loaded = False
        
    def clear_memory(self):
        """Aggressive memory cleanup to free up GPU/CPU memory."""
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
            torch.cuda.synchronize()
        gc.collect()
    
    def load_nsfw_detector(self) -> bool:
        """Load NSFW detection model (CLIP) with error handling."""
        if self.nsfw_detector_loaded:
            return True
            
        try:
            logger.info("Loading NSFW detector...")
            self.nsfw_detector_processor = CLIPProcessor.from_pretrained(NSFW_MODEL_ID)
            # Add use_safetensors=True to the CLIPModel.from_pretrained call
            self.nsfw_detector_model = CLIPModel.from_pretrained(NSFW_MODEL_ID, use_safetensors=True)
            
            if DEVICE == "cuda":
                self.nsfw_detector_model = self.nsfw_detector_model.to(DEVICE)
            
            self.nsfw_detector_loaded = True
            logger.info("NSFW detector loaded successfully!")
            return True
            
        except Exception as e:
            logger.error(f"Failed to load NSFW detector: {e}")
            self.nsfw_detector_loaded = False
            return False
    
    def is_nsfw(self, image: Image.Image, prompt: str = "") -> Tuple[bool, float]:
        """
        Detects NSFW content using CLIP-based zero-shot classification.
        Falls back to prompt-based detection if CLIP model fails or is not loaded.
        """
        try:
            # Load NSFW detector if not already loaded
            if not self.nsfw_detector_loaded:
                if not self.load_nsfw_detector():
                    # If NSFW detector cannot be loaded, fall back to prompt-based
                    return self._fallback_nsfw_detection(prompt)
            
            # CLIP-based NSFW detection
            inputs = self.nsfw_detector_processor(images=image, return_tensors="pt").to(DEVICE)
            
            with torch.no_grad():
                image_features = self.nsfw_detector_model.get_image_features(**inputs)
                
                # Define text prompts for classification
                safe_prompts = [
                    "a safe family-friendly image", 
                    "a general photo", 
                    "appropriate content",
                    "artistic photography"
                ]
                unsafe_prompts = [
                    "explicit adult content", 
                    "nudity", 
                    "inappropriate sexual content",
                    "pornographic material"
                ]
                
                # Get text features
                safe_inputs = self.nsfw_detector_processor(
                    text=safe_prompts, return_tensors="pt", padding=True
                ).to(DEVICE)
                unsafe_inputs = self.nsfw_detector_processor(
                    text=unsafe_prompts, return_tensors="pt", padding=True
                ).to(DEVICE)
                
                safe_features = self.nsfw_detector_model.get_text_features(**safe_inputs)
                unsafe_features = self.nsfw_detector_model.get_text_features(**unsafe_inputs)
                
                # Normalize features for cosine similarity
                image_features = F.normalize(image_features, p=2, dim=-1)
                safe_features = F.normalize(safe_features, p=2, dim=-1)
                unsafe_features = F.normalize(unsafe_features, p=2, dim=-1)
                
                # Calculate similarities
                safe_similarity = (image_features @ safe_features.T).mean().item()
                unsafe_similarity = (image_features @ unsafe_features.T).mean().item()
                
                # Classification logic
                is_nsfw_result = (
                    unsafe_similarity > safe_similarity and 
                    unsafe_similarity > NSFW_THRESHOLD
                )
                
                confidence = unsafe_similarity if is_nsfw_result else safe_similarity
                
                if is_nsfw_result:
                    logger.warning(f"🚨 NSFW content detected (CLIP-based: {unsafe_similarity:.3f} > {safe_similarity:.3f})")
                
                return is_nsfw_result, confidence
                
        except Exception as e:
            logger.error(f"NSFW detection error (CLIP model failed): {e}")
            # Fallback to prompt-based detection if CLIP model encounters an error
            return self._fallback_nsfw_detection(prompt)
    
    def _fallback_nsfw_detection(self, prompt: str = "") -> Tuple[bool, float]:
        """Fallback NSFW detection based on prompt keyword analysis."""
        nsfw_keywords = [
            'nude', 'naked', 'nsfw', 'explicit', 'sexual', 'erotic', 'porn',
            'adult', 'xxx', 'sex', 'breast', 'nipple', 'genital', 'provocative'
        ]
        
        prompt_lower = prompt.lower()
        for keyword in nsfw_keywords:
            if keyword in prompt_lower:
                logger.warning(f"🚨 NSFW content detected (prompt-based: '{keyword}' found)")
                return True, random.uniform(0.7, 0.95)
        
        # Random chance for demonstration (consider removing in production)
        if random.random() < 0.02:  # 2% chance for demo
            logger.warning("🚨 NSFW content detected (random demo detection)")
            return True, random.uniform(0.6, 0.8)
        
        return False, random.uniform(0.1, 0.3)
    
    def load_models(self) -> bool:
        """Load Stable Diffusion XL models (txt2img and img2img) with enhanced error handling and memory optimization."""
        if self.model_loaded:
            return True
            
        try:
            logger.info("Loading CyberRealistic Pony models...")
            
            # Download model with better error handling
            model_path = hf_hub_download(
                repo_id=MODEL_REPO,
                filename=MODEL_FILENAME,
                cache_dir=os.environ.get("HF_CACHE_DIR", "/tmp/hf_cache"),
                resume_download=True
            )
            logger.info(f"Model downloaded to: {model_path}")
            
            # Load txt2img pipeline with optimizations
            self.txt2img_pipe = StableDiffusionXLPipeline.from_single_file(
                model_path,
                torch_dtype=DTYPE,
                use_safetensors=True,
                variant="fp16" if DEVICE == "cuda" else None,
                safety_checker=None,  # Disable for faster loading, using custom NSFW check
                requires_safety_checker=False
            )
            
            # Apply memory optimizations to txt2img pipeline
            self._optimize_pipeline(self.txt2img_pipe)
            
            # Create img2img pipeline sharing components
            self.img2img_pipe = StableDiffusionXLImg2ImgPipeline(
                vae=self.txt2img_pipe.vae,
                text_encoder=self.txt2img_pipe.text_encoder,
                text_encoder_2=self.txt2img_pipe.text_encoder_2,
                tokenizer=self.txt2img_pipe.tokenizer,
                tokenizer_2=self.txt2img_pipe.tokenizer_2,
                unet=self.txt2img_pipe.unet,
                scheduler=self.txt2img_pipe.scheduler,
                # Removed safety_checker and requires_safety_checker as they are not valid for this constructor
            )
            
            # Apply memory optimizations to img2img pipeline
            self._optimize_pipeline(self.img2img_pipe)
            
            self.model_loaded = True
            logger.info("Models loaded successfully!")
            return True
            
        except Exception as e:
            logger.error(f"Failed to load models: {e}")
            self.model_loaded = False
            return False
    
    def _optimize_pipeline(self, pipeline):
        """Apply memory optimizations to a given diffusion pipeline."""
        pipeline.enable_attention_slicing()
        pipeline.enable_vae_slicing()
        
        if DEVICE == "cuda":
            # Use sequential CPU offloading for better memory management on GPU
            pipeline.enable_sequential_cpu_offload()
            # Enable memory efficient attention if xformers is available
            try:
                pipeline.enable_xformers_memory_efficient_attention()
            except Exception: # Catch any error if xformers is not installed/configured
                logger.info("xformers not available, using default attention")
        else:
            # Move pipeline to CPU if CUDA is not available
            pipeline = pipeline.to(DEVICE)

# Global pipeline manager instance
pipe_manager = PipelineManager()

# Enhanced prompt templates
QUALITY_TAGS = "score_9, score_8_up, score_7_up, masterpiece, best quality, ultra detailed, 8k"

DEFAULT_NEGATIVE = """(worst quality:1.4), (low quality:1.4), (normal quality:1.2), 
lowres, bad anatomy, bad hands, signature, watermarks, ugly, imperfect eyes, 
skewed eyes, unnatural face, unnatural body, error, extra limb, missing limbs, 
painting by bad-artist, 3d, render"""

EXAMPLE_PROMPTS = [
    "beautiful anime girl with long flowing silver hair, sakura petals, soft morning light",
    "cyberpunk street scene, neon lights reflecting on wet pavement, futuristic cityscape",
    "majestic dragon soaring through storm clouds, lightning, epic fantasy scene",
    "cute anthropomorphic fox girl, fluffy tail, forest clearing, magical sparkles",
    "elegant Victorian lady in ornate dress, portrait, vintage photography style",
    "futuristic mech suit, glowing energy core, sci-fi laboratory background",
    "mystical unicorn with rainbow mane, enchanted forest, ethereal atmosphere",
    "steampunk inventor's workshop, brass gears, mechanical contraptions, warm lighting"
]

def enhance_prompt(prompt: str, add_quality: bool = True) -> str:
    """
    Enhances the given prompt with quality tags unless they are already present.
    """
    if not prompt.strip():
        return ""
    
    # Don't add quality tags if they're already present in the prompt (case-insensitive)
    if any(tag in prompt.lower() for tag in ["score_", "masterpiece", "best quality"]):
        return prompt
    
    if add_quality:
        return f"{QUALITY_TAGS}, {prompt}"
    return prompt

def validate_and_fix_dimensions(width: int, height: int) -> Tuple[int, int]:
    """
    Ensures SDXL-compatible dimensions (multiples of 64) and reasonable aspect ratios.
    Clamps dimensions between 512 and 1024.
    """
    # Round to nearest multiple of 64
    width = max(512, min(1024, ((width + 31) // 64) * 64))
    height = max(512, min(1024, ((height + 31) // 64) * 64))
    
    # Ensure reasonable aspect ratios (prevent extremely wide/tall images)
    aspect_ratio = width / height
    if aspect_ratio > 2.0:  # Too wide, adjust height
        height = width // 2
    elif aspect_ratio < 0.5:  # Too tall, adjust width
        width = height // 2
    
    return width, height

def create_metadata_png(image: Image.Image, params: Dict[str, Any]) -> str:
    """
    Creates a temporary PNG file with embedded metadata from the generation parameters.
    Returns the path to the created PNG file.
    """
    temp_path = tempfile.mktemp(suffix=".png", prefix="cyberrealistic_")
    
    meta = PngImagePlugin.PngInfo()
    for key, value in params.items():
        if value is not None:
            meta.add_text(key, str(value))
    
    # Add generation timestamp and model info
    meta.add_text("Generated", datetime.now().strftime("%Y-%m-%d %H:%M:%S UTC"))
    meta.add_text("Model", f"{MODEL_REPO}/{MODEL_FILENAME}")
    
    image.save(temp_path, "PNG", pnginfo=meta, optimize=True)
    return temp_path

def format_generation_info(params: Dict[str, Any], generation_time: float) -> str:
    """
    Formats the generation information into a human-readable string for display.
    """
    info_lines = [
        f"βœ… Generated in {generation_time:.1f}s",
        f"πŸ“ Resolution: {params.get('width', 'N/A')}Γ—{params.get('height', 'N/A')}",
        f"🎯 Prompt: {params.get('prompt', '')[:60]}{'...' if len(params.get('prompt', '')) > 60 else ''}",
        f"🚫 Negative: {params.get('negative_prompt', 'None')[:40]}{'...' if len(params.get('negative_prompt', '')) > 40 else ''}",
        f"🎲 Seed: {params.get('seed', 'N/A')}",
        f"πŸ“Š Steps: {params.get('steps', 'N/A')} | CFG: {params.get('guidance_scale', 'N/A')}"
    ]
    
    if 'strength' in params:
        info_lines.append(f"πŸ’ͺ Strength: {params['strength']}")
    
    return "\n".join(info_lines)

@spaces.GPU(duration=120)  # Increased duration for model loading and generation
def generate_txt2img(prompt: str, negative_prompt: str, steps: int, guidance_scale: float, 
                    width: int, height: int, seed: int, add_quality: bool) -> Tuple:
    """
    Handles text-to-image generation, including parameter processing, model inference,
    NSFW detection, and metadata creation.
    """
    
    if not prompt.strip():
        return None, None, "❌ Please enter a prompt."
    
    # Lazy load models if not already loaded
    if not pipe_manager.load_models():
        return None, None, "❌ Failed to load model. Please try again."
    
    try:
        pipe_manager.clear_memory() # Clear memory before generation
        
        # Process parameters
        width, height = validate_and_fix_dimensions(width, height)
        if seed == -1:
            seed = random.randint(0, MAX_SEED)
        
        enhanced_prompt = enhance_prompt(prompt, add_quality)
        generator = torch.Generator(device=DEVICE).manual_seed(seed)
        
        # Generation parameters dictionary
        gen_params = {
            "prompt": enhanced_prompt,
            "negative_prompt": negative_prompt or DEFAULT_NEGATIVE,
            "num_inference_steps": min(max(steps, 10), 50),  # Clamp steps to a reasonable range
            "guidance_scale": max(1.0, min(guidance_scale, 20.0)),  # Clamp guidance scale
            "width": width,
            "height": height,
            "generator": generator,
            "output_type": "pil"
        }
        
        logger.info(f"Generating: {enhanced_prompt[:50]}...")
        start_time = time.time()
        
        with torch.inference_mode():
            result = pipe_manager.txt2img_pipe(**gen_params)
        
        generation_time = time.time() - start_time
        
        # Perform NSFW Detection on the generated image
        is_nsfw_result, nsfw_confidence = pipe_manager.is_nsfw(result.images[0], enhanced_prompt)
        
        if is_nsfw_result:
            # If NSFW, blur the image and return a warning message
            blurred_image = result.images[0].filter(ImageFilter.GaussianBlur(radius=20))
            warning_msg = f"⚠️ Content flagged as potentially inappropriate (confidence: {nsfw_confidence:.2f}). Image has been blurred."
            
            # Still save metadata but mark as filtered
            metadata = {
                "prompt": enhanced_prompt,
                "negative_prompt": negative_prompt or DEFAULT_NEGATIVE,
                "steps": gen_params["num_inference_steps"],
                "guidance_scale": gen_params["guidance_scale"],
                "width": width,
                "height": height,
                "seed": seed,
                "sampler": "Euler Ancestral",
                "model_hash": "cyberrealistic_pony_v110",
                "nsfw_filtered": "true",
                "nsfw_confidence": f"{nsfw_confidence:.3f}"
            }
            
            png_path = create_metadata_png(blurred_image, metadata)
            info_text = f"{warning_msg}\n\n{format_generation_info(metadata, generation_time)}"
            
            return blurred_image, png_path, info_text
        
        # If not NSFW, prepare metadata and save the original image
        metadata = {
            "prompt": enhanced_prompt,
            "negative_prompt": negative_prompt or DEFAULT_NEGATIVE,
            "steps": gen_params["num_inference_steps"],
            "guidance_scale": gen_params["guidance_scale"],
            "width": width,
            "height": height,
            "seed": seed,
            "sampler": "Euler Ancestral",
            "model_hash": "cyberrealistic_pony_v110"
        }
        
        # Save with metadata
        png_path = create_metadata_png(result.images[0], metadata)
        info_text = format_generation_info(metadata, generation_time)
        
        return result.images[0], png_path, info_text
        
    except torch.cuda.OutOfMemoryError:
        pipe_manager.clear_memory()
        return None, None, "❌ GPU out of memory. Try smaller dimensions or fewer steps."
    except Exception as e:
        logger.error(f"Generation error: {e}")
        return None, None, f"❌ Generation failed: {str(e)}"
    finally:
        pipe_manager.clear_memory() # Ensure memory is cleared even if an occurs

@spaces.GPU(duration=120) # Increased duration for model loading and generation
def generate_img2img(input_image: Image.Image, prompt: str, negative_prompt: str, 
                    steps: int, guidance_scale: float, strength: float, seed: int, 
                    add_quality: bool) -> Tuple:
    """
    Handles image-to-image generation, including image preprocessing, parameter processing,
    model inference, NSFW detection, and metadata creation.
    """
    
    if input_image is None:
        return None, None, "❌ Please upload an input image."
    
    if not prompt.strip():
        return None, None, "❌ Please enter a prompt."
    
    # Lazy load models if not already loaded
    if not pipe_manager.load_models():
        return None, None, "❌ Failed to load model. Please try again."
    
    try:
        pipe_manager.clear_memory() # Clear memory before generation
        
        # Process input image: convert to RGB if necessary
        if input_image.mode != 'RGB':
            input_image = input_image.convert('RGB')
        
        # Smart resizing maintaining aspect ratio to fit within max_dimension
        original_size = input_image.size
        max_dimension = 1024
        
        if max(original_size) > max_dimension:
            input_image.thumbnail((max_dimension, max_dimension), Image.Resampling.LANCZOS)
        
        # Ensure SDXL compatible dimensions (multiples of 64)
        w, h = validate_and_fix_dimensions(*input_image.size)
        input_image = input_image.resize((w, h), Image.Resampling.LANCZOS)
        
        # Process other parameters
        if seed == -1:
            seed = random.randint(0, MAX_SEED)
        
        enhanced_prompt = enhance_prompt(prompt, add_quality)
        generator = torch.Generator(device=DEVICE).manual_seed(seed)
        
        # Generation parameters dictionary
        gen_params = {
            "prompt": enhanced_prompt,
            "negative_prompt": negative_prompt or DEFAULT_NEGATIVE,
            "image": input_image,
            "num_inference_steps": min(max(steps, 10), 50), # Clamp steps
            "guidance_scale": max(1.0, min(guidance_scale, 20.0)), # Clamp guidance scale
            "strength": max(0.1, min(strength, 1.0)), # Clamp strength
            "generator": generator,
            "output_type": "pil"
        }
        
        logger.info(f"Transforming: {enhanced_prompt[:50]}...")
        start_time = time.time()
        
        with torch.inference_mode():
            result = pipe_manager.img2img_pipe(**gen_params)
        
        generation_time = time.time() - start_time
        
        # Perform NSFW Detection on the transformed image
        is_nsfw_result, nsfw_confidence = pipe_manager.is_nsfw(result.images[0], enhanced_prompt)
        
        if is_nsfw_result:
            # If NSFW, blur the image and return a warning message
            blurred_image = result.images[0].filter(ImageFilter.GaussianBlur(radius=20))
            warning_msg = f"⚠️ Content flagged as potentially inappropriate (confidence: {nsfw_confidence:.2f}). Image has been blurred."
            
            metadata = {
                "prompt": enhanced_prompt,
                "negative_prompt": negative_prompt or DEFAULT_NEGATIVE,
                "steps": gen_params["num_inference_steps"],
                "guidance_scale": gen_params["guidance_scale"],
                "strength": gen_params["strength"],
                "width": w,
                "height": h,
                "seed": seed,
                "sampler": "Euler Ancestral",
                "model_hash": "cyberrealistic_pony_v110",
                "nsfw_filtered": "true",
                "nsfw_confidence": f"{nsfw_confidence:.3f}"
            }
            
            png_path = create_metadata_png(blurred_image, metadata)
            info_text = f"{warning_msg}\n\n{format_generation_info(metadata, generation_time)}"
            
            return blurred_image, png_path, info_text
        
        # If not NSFW, prepare metadata and save the original image
        metadata = {
            "prompt": enhanced_prompt,
            "negative_prompt": negative_prompt or DEFAULT_NEGATIVE,
            "steps": gen_params["num_inference_steps"],
            "guidance_scale": gen_params["guidance_scale"],
            "strength": gen_params["strength"],
            "width": w,
            "height": h,
            "seed": seed,
            "sampler": "Euler Ancestral",
            "model_hash": "cyberrealistic_pony_v110"
        }
        
        png_path = create_metadata_png(result.images[0], metadata)
        info_text = format_generation_info(metadata, generation_time)
        
        return result.images[0], png_path, info_text
        
    except torch.cuda.OutOfMemoryError:
        pipe_manager.clear_memory()
        return None, None, "❌ GPU out of memory. Try lower strength or fewer steps."
    except Exception as e:
        logger.error(f"Generation error: {e}")
        return None, None, f"❌ Generation failed: {str(e)}"
    finally:
        pipe_manager.clear_memory() # Ensure memory is cleared even if an error occurs

def get_random_prompt():
    """Returns a random example prompt from a predefined list."""
    return random.choice(EXAMPLE_PROMPTS)

# Enhanced Gradio interface
def create_interface():
    """
    Creates and returns the Gradio Blocks interface for the CyberRealistic Pony Generator.
    This includes tabs for Text-to-Image and Image-to-Image, along with controls and outputs.
    """
    
    with gr.Blocks(
        title="CyberRealistic Pony - SDXL Generator",
        theme=gr.themes.Soft(primary_hue="blue"),
        css="""
        .generate-btn { 
            background: linear-gradient(45deg, #667eea 0%, #764ba2 100%) !important;
            border: none !important;
        }
        .generate-btn:hover {
            transform: translateY(-2px);
            box-shadow: 0 4px 12px rgba(0,0,0,0.2);
        }
        """
    ) as demo:
        
        gr.Markdown("""
        # 🎨 CyberRealistic Pony Generator
        
        **High-quality SDXL image generation** β€’ Optimized for HuggingFace Spaces β€’ **NSFW Content Filter Enabled**
        
        > ⚑ **First generation takes longer** (model loading) β€’ πŸ“‹ **Metadata embedded** in all outputs β€’ πŸ›‘οΈ **Content filtered for safety**
        """)
        
        with gr.Tabs():
            # Text to Image Tab
            with gr.TabItem("🎨 Text to Image", id="txt2img"):
                with gr.Row():
                    with gr.Column(scale=1):
                        with gr.Group():
                            txt_prompt = gr.Textbox(
                                label="✨ Prompt",
                                placeholder="A beautiful landscape with mountains and sunset...",
                                lines=3,
                                max_lines=5
                            )
                            
                            with gr.Row():
                                txt_example_btn = gr.Button("🎲 Random", size="sm")
                                txt_clear_btn = gr.Button("πŸ—‘οΈ Clear", size="sm")
                        
                        with gr.Accordion("βš™οΈ Advanced Settings", open=False):
                            txt_negative = gr.Textbox(
                                label="❌ Negative Prompt",
                                value=DEFAULT_NEGATIVE,
                                lines=2,
                                max_lines=3
                            )
                            
                            txt_quality = gr.Checkbox(
                                label="✨ Add Quality Tags",
                                value=True,
                                info="Automatically enhance prompt with quality tags"
                            )
                            
                            with gr.Row():
                                txt_steps = gr.Slider(
                                    10, 50, 25, step=1,
                                    label="πŸ“Š Steps",
                                    info="More steps = better quality, slower generation"
                                )
                                txt_guidance = gr.Slider(
                                    1.0, 15.0, 7.5, step=0.5,
                                    label="πŸŽ›οΈ CFG Scale",
                                    info="How closely to follow the prompt"
                                )
                            
                            with gr.Row():
                                txt_width = gr.Slider(
                                    512, 1024, 768, step=64,
                                    label="πŸ“ Width"
                                )
                                txt_height = gr.Slider(
                                    512, 1024, 768, step=64,
                                    label="πŸ“ Height"
                                )
                            
                            txt_seed = gr.Slider(
                                -1, MAX_SEED, -1, step=1,
                                label="🎲 Seed (-1 = random)",
                                info="Use same seed for reproducible results"
                            )
                        
                        txt_generate_btn = gr.Button(
                            "🎨 Generate Image",
                            variant="primary",
                            size="lg",
                            elem_classes=["generate-btn"]
                        )
                    
                    with gr.Column(scale=1):
                        txt_output_image = gr.Image(
                            label="πŸ–ΌοΈ Generated Image",
                            height=500,
                            show_download_button=True
                        )
                        txt_download_file = gr.File(
                            label="πŸ“₯ Download PNG (with metadata)",
                            file_types=[".png"]
                        )
                        txt_info = gr.Textbox(
                            label="ℹ️ Generation Info",
                            lines=6,
                            max_lines=8,
                            interactive=False
                        )
            
            # Image to Image Tab
            with gr.TabItem("πŸ–ΌοΈ Image to Image", id="img2img"):
                with gr.Row():
                    with gr.Column(scale=1):
                        img_input = gr.Image(
                            label="πŸ“€ Input Image",
                            type="pil",
                            height=300
                        )
                        
                        with gr.Group():
                            img_prompt = gr.Textbox(
                                label="✨ Transformation Prompt",
                                placeholder="digital art style, vibrant colors...",
                                lines=3
                            )
                            
                            with gr.Row():
                                img_example_btn = gr.Button("🎲 Random", size="sm")
                                img_clear_btn = gr.Button("πŸ—‘οΈ Clear", size="sm")
                        
                        with gr.Accordion("βš™οΈ Advanced Settings", open=False):
                            img_negative = gr.Textbox(
                                label="❌ Negative Prompt",
                                value=DEFAULT_NEGATIVE,
                                lines=2
                            )
                            
                            img_quality = gr.Checkbox(
                                label="✨ Add Quality Tags",
                                value=True
                            )
                            
                            with gr.Row():
                                img_steps = gr.Slider(10, 50, 25, step=1, label="πŸ“Š Steps")
                                img_guidance = gr.Slider(1.0, 15.0, 7.5, step=0.5, label="πŸŽ›οΈ CFG")
                            
                            img_strength = gr.Slider(
                                0.1, 1.0, 0.75, step=0.05,
                                label="πŸ’ͺ Transformation Strength",
                                info="Higher = more creative, lower = more faithful to input"
                            )
                            
                            img_seed = gr.Slider(-1, MAX_SEED, -1, step=1, label="🎲 Seed")
                        
                        img_generate_btn = gr.Button(
                            "πŸ–ΌοΈ Transform Image",
                            variant="primary",
                            size="lg",
                            elem_classes=["generate-btn"]
                        )
                    
                    with gr.Column(scale=1):
                        img_output_image = gr.Image(
                            label="πŸ–ΌοΈ Transformed Image",
                            height=500,
                            show_download_button=True
                        )
                        img_download_file = gr.File(
                            label="πŸ“₯ Download PNG (with metadata)",
                            file_types=[".png"]
                        )
                        img_info = gr.Textbox(
                            label="ℹ️ Generation Info",
                            lines=6,
                            interactive=False
                        )
        
        # Event handlers
        txt_generate_btn.click(
            fn=generate_txt2img,
            inputs=[txt_prompt, txt_negative, txt_steps, txt_guidance, 
                   txt_width, txt_height, txt_seed, txt_quality],
            outputs=[txt_output_image, txt_download_file, txt_info],
            show_progress=True
        )
        
        img_generate_btn.click(
            fn=generate_img2img,
            inputs=[img_input, img_prompt, img_negative, img_steps, img_guidance,
                   img_strength, img_seed, img_quality],
            outputs=[img_output_image, img_download_file, img_info],
            show_progress=True
        )
        
        # Example prompt buttons
        txt_example_btn.click(fn=get_random_prompt, outputs=[txt_prompt])
        img_example_btn.click(fn=get_random_prompt, outputs=[img_prompt])
        
        # Clear buttons
        txt_clear_btn.click(lambda: "", outputs=[txt_prompt])
        img_clear_btn.click(lambda: "", outputs=[img_prompt])
    
    return demo

# Initialize and launch the Gradio application
if __name__ == "__main__":
    logger.info(f"πŸš€ Initializing CyberRealistic Pony Generator on {DEVICE}")
    logger.info(f"πŸ“± PyTorch version: {torch.__version__}")
    logger.info(f"πŸ›‘οΈ NSFW Content Filter: Enabled")
    
    demo = create_interface()
    demo.queue(max_size=20)  # Enable queuing for better user experience
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        show_error=True,
        share=False  # Set to True if you want a public link (e.g., for Hugging Face Spaces)
    )