Spaces:

Aatricks
/

LightDiffusion-Next

Running on Zero

App Files Files Community

Aatricks commited on about 20 hours ago

Commit

8a58a6e

verified ·

1 Parent(s): f83908e

Upload folder using huggingface_hub

Browse files

Files changed (17) hide show

.dockerignore +84 -0
Dockerfile +87 -0
README.md +30 -3
app.py +372 -216
docker-compose.yml +31 -0
modules/Device/ModelCache.py +169 -0
modules/FileManaging/Loader.py +18 -0
modules/UltimateSDUpscale/image_util.py +52 -9
modules/sample/CFG.py +4 -1
modules/sample/multiscale_presets.py +143 -0
modules/sample/samplers.py +456 -17
modules/sample/sampling.py +123 -25
modules/user/GUI.py +124 -7
modules/user/pipeline.py +75 -0
pipeline.bat +1 -1
run.bat +1 -1
run_web.bat +1 -1

.dockerignore ADDED Viewed

	@@ -0,0 +1,84 @@

+# Python cache files
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# Virtual environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# IDE files
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+# OS files
+.DS_Store
+.DS_Store?
+._*
+.Spotlight-V100
+.Trashes
+ehthumbs.db
+Thumbs.db
+# Git
+.git/
+.gitignore
+# Docker
+Dockerfile*
+.dockerignore
+docker-compose*.yml
+# Documentation
+*.md
+docs/
+# Large model files (these should be downloaded at runtime)
+*.safetensors
+*.ckpt
+*.pt
+*.pth
+*.bin
+*.gguf
+# Logs
+*.log
+logs/
+# Temporary files
+tmp/
+temp/
+*.tmp
+# Generated images (these will be created at runtime)
+_internal/output/
+# Large dependencies that will be installed via pip
+stable_fast-*.whl

Dockerfile ADDED Viewed

	@@ -0,0 +1,87 @@

+# Use Python 3.10 base image
+FROM python:3.10-slim-bullseye
+# Set environment variables
+ENV DEBIAN_FRONTEND=noninteractive
+ENV PYTHONUNBUFFERED=1
+ENV PYTHONDONTWRITEBYTECODE=1
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    python3-dev \
+    python3-venv \
+    python3-pip \
+    python3-tk \
+    git \
+    wget \
+    curl \
+    build-essential \
+    libgl1-mesa-glx \
+    libglib2.0-0 \
+    libsm6 \
+    libxext6 \
+    libxrender-dev \
+    libgomp1 \
+    software-properties-common \
+    && rm -rf /var/lib/apt/lists/*
+# Set working directory
+WORKDIR /app
+# Copy requirements first to leverage Docker cache
+COPY requirements.txt .
+# Upgrade pip and install uv for faster package installation
+RUN python3 -m pip install --upgrade pip
+RUN python3 -m pip install uv
+# Install PyTorch with CUDA support
+RUN python3 -m uv pip install --system --index-url https://download.pytorch.org/whl/cu128 \
+    torch torchvision  "triton>=2.1.0"
+# Install numpy with version constraint
+RUN python3 -m uv pip install --system "numpy<2.0.0"
+# Install Python dependencies
+RUN python3 -m uv pip install --system -r requirements.txt
+# Copy the entire project
+COPY . .
+# Create necessary directories
+RUN mkdir -p ./_internal/output/classic \
+    ./_internal/output/Flux \
+    ./_internal/output/HiresFix \
+    ./_internal/output/Img2Img \
+    ./_internal/output/Adetailer \
+    ./_internal/checkpoints \
+    ./_internal/clip \
+    ./_internal/embeddings \
+    ./_internal/ESRGAN \
+    ./_internal/loras \
+    ./_internal/sd1_tokenizer \
+    ./_internal/unet \
+    ./_internal/vae \
+    ./_internal/vae_approx \
+    ./_internal/yolos
+# Create last_seed.txt if it doesn't exist
+RUN echo "42" > ./_internal/last_seed.txt
+# Create prompt.txt if it doesn't exist
+RUN echo "A beautiful landscape" > ./_internal/prompt.txt
+# Expose the port that Gradio will run on
+EXPOSE 7860
+# Set environment variable to indicate this is running in a container
+ENV GRADIO_SERVER_NAME=0.0.0.0
+ENV GRADIO_SERVER_PORT=7860
+# Health check
+HEALTHCHECK --interval=30s --timeout=30s --start-period=60s --retries=3 \
+  CMD curl -f http://localhost:7860/ || exit 1
+# Run the Gradio app
+CMD ["python3", "app.py"]

README.md CHANGED Viewed

@@ -2,7 +2,7 @@
 title: LightDiffusion-Next
 app_file: app.py
 sdk: gradio
-sdk_version: 5.20.0
 ---
 <div align="center">
@@ -40,7 +40,7 @@ That's when the first version of LightDiffusion was born which only counted [300
 Advanced users can take advantage of features like **attention syntax**, **Hires-Fix** or **ADetailer**. These tools provide better quality and flexibility for generating complex and high-resolution outputs.
-**LightDiffusion-Next** is fine-tuned for **performance**. Features such as **Xformers** acceleration, **BFloat16** precision support, **WaveSpeed** dynamic caching, and **Stable-Fast** model compilation (which offers up to a 70% speed boost) ensure smooth and efficient operation, even on demanding workloads.
 ---
@@ -49,7 +49,7 @@ Advanced users can take advantage of features like **attention syntax**, **Hires
 Here’s what makes LightDiffusion-Next stand out:
 - **Speed and Efficiency**:
-  Enjoy industry-leading performance with built-in Xformers, Pytorch, Wavespeed and Stable-Fast optimizations, achieving up to 30% faster speeds compared to the rest of the AI image generation backends in SD1.5 and up to 2x for Flux.
 - **Automatic Detailing**:
   Effortlessly enhance faces and body details with AI-driven tools based on the [Impact Pack](https://github.com/ltdrdata/ComfyUI-Impact-Pack).
@@ -109,6 +109,33 @@ With its unmatched speed and efficiency, LightDiffusion-Next sets the benchmark
 2. Run `run.bat` in a terminal.
 3. Start creating!
 ### Command-Line Pipeline
 For a GUI-free experience, use the pipeline:

 title: LightDiffusion-Next
 app_file: app.py
 sdk: gradio
+sdk_version: 5.38.0
 ---
 <div align="center">
 Advanced users can take advantage of features like **attention syntax**, **Hires-Fix** or **ADetailer**. These tools provide better quality and flexibility for generating complex and high-resolution outputs.
+**LightDiffusion-Next** is fine-tuned for **performance**. Features such as **Xformers** acceleration, **BFloat16** precision support, **WaveSpeed** dynamic caching, **Multi-scale diffusion**, and **Stable-Fast** model compilation (which offers up to a 70% speed boost) ensure smooth and efficient operation, even on demanding workloads.
 ---
 Here’s what makes LightDiffusion-Next stand out:
 - **Speed and Efficiency**:
+  Enjoy industry-leading performance with built-in Xformers, Pytorch, Wavespeed and Stable-Fast optimizations, Multi-scale diffusion, achieving up to 30% faster speeds compared to the rest of the AI image generation backends in SD1.5 and up to 2x for Flux.
 - **Automatic Detailing**:
   Effortlessly enhance faces and body details with AI-driven tools based on the [Impact Pack](https://github.com/ltdrdata/ComfyUI-Impact-Pack).
 2. Run `run.bat` in a terminal.
 3. Start creating!
+### 🐳 Docker Setup
+Run LightDiffusion-Next in a containerized environment with GPU acceleration:
+**Prerequisites:**
+- Docker with NVIDIA Container Toolkit installed
+- NVIDIA GPU with CUDA support
+**Quick Start with Docker:**
+```bash
+# Build and run with docker-compose (recommended)
+docker-compose up --build
+# Or build and run manually
+docker build -t lightdiffusion-next .
+docker run --gpus all -p 7860:7860 -v ./output:/app/_internal/output lightdiffusion-next
+```
+**Access the Gradio Web Interface:**
+Open your browser and navigate to `http://localhost:7860`
+**Volume Mounts:**
+- `./output:/app/_internal/output` - Persist generated images
+- `./checkpoints:/app/_internal/checkpoints` - Store model files
+- `./loras:/app/_internal/loras` - Store LoRA files
+- `./embeddings:/app/_internal/embeddings` - Store embeddings
 ### Command-Line Pipeline
 For a GUI-free experience, use the pipeline:

app.py CHANGED Viewed

@@ -1,216 +1,372 @@
-import glob
-import gradio as gr
-import sys
-import os
-from PIL import Image
-import numpy as np
-import spaces
-sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../..")))
-from modules.user.pipeline import pipeline
-import torch
-def load_generated_images():
-    """Load generated images with given prefix from disk"""
-    image_files = glob.glob("./_internal/output/**/*.png")
-    # If there are no image files, return
-    if not image_files:
-        return []
-    # Sort files by modification time in descending order
-    image_files.sort(key=os.path.getmtime, reverse=True)
-    # Get most recent timestamp
-    latest_time = os.path.getmtime(image_files[0])
-    # Get all images from same batch (within 1 second of most recent)
-    batch_images = []
-    for file in image_files:
-        if abs(os.path.getmtime(file) - latest_time) < 1.0:
-            try:
-                img = Image.open(file)
-                batch_images.append(img)
-            except:
-                continue
-    if not batch_images:
-        return []
-    return batch_images
-@spaces.GPU(duration=120)
-def generate_images(
-    prompt: str,
-    width: int = 512,
-    height: int = 512,
-    num_images: int = 1,
-    batch_size: int = 1,
-    hires_fix: bool = False,
-    adetailer: bool = False,
-    enhance_prompt: bool = False,
-    img2img_enabled: bool = False,
-    img2img_image: str = None,
-    stable_fast: bool = False,
-    reuse_seed: bool = False,
-    flux_enabled: bool = False,
-    prio_speed: bool = False,
-    realistic_model: bool = False,
-    progress=gr.Progress(),
-):
-    """Generate images using the LightDiffusion pipeline"""
-    try:
-        if img2img_enabled and img2img_image is not None:
-            # Convert numpy array to PIL Image
-            if isinstance(img2img_image, np.ndarray):
-                img_pil = Image.fromarray(img2img_image)
-                img_pil.save("temp_img2img.png")
-                prompt = "temp_img2img.png"
-        # Run pipeline and capture saved images
-        with torch.inference_mode():
-            pipeline(
-                prompt=prompt,
-                w=width,
-                h=height,
-                number=num_images,
-                batch=batch_size,
-                hires_fix=hires_fix,
-                adetailer=adetailer,
-                enhance_prompt=enhance_prompt,
-                img2img=img2img_enabled,
-                stable_fast=stable_fast,
-                reuse_seed=reuse_seed,
-                flux_enabled=flux_enabled,
-                prio_speed=prio_speed,
-                autohdr=True,
-                realistic_model=realistic_model,
-            )
-        # Clean up temporary file if it exists
-        if os.path.exists("temp_img2img.png"):
-            os.remove("temp_img2img.png")
-        return load_generated_images()
-    except Exception:
-        import traceback
-        print(traceback.format_exc())
-        # Clean up temporary file if it exists
-        if os.path.exists("temp_img2img.png"):
-            os.remove("temp_img2img.png")
-        return [Image.new("RGB", (512, 512), color="black")]
-# Create Gradio interface
-with gr.Blocks(title="LightDiffusion Web UI") as demo:
-    gr.Markdown("# LightDiffusion Web UI")
-    gr.Markdown("Generate AI images using LightDiffusion")
-    gr.Markdown(
-        "This is the demo for LightDiffusion, the fastest diffusion backend for generating images. https://github.com/LightDiffusion/LightDiffusion-Next"
-    )
-    with gr.Row():
-        with gr.Column():
-            # Input components
-            prompt = gr.Textbox(label="Prompt", placeholder="Enter your prompt here...")
-            with gr.Row():
-                width = gr.Slider(
-                    minimum=64, maximum=2048, value=512, step=64, label="Width"
-                )
-                height = gr.Slider(
-                    minimum=64, maximum=2048, value=512, step=64, label="Height"
-                )
-            with gr.Row():
-                num_images = gr.Slider(
-                    minimum=1, maximum=10, value=1, step=1, label="Number of Images"
-                )
-                batch_size = gr.Slider(
-                    minimum=1, maximum=4, value=1, step=1, label="Batch Size"
-                )
-            with gr.Row():
-                hires_fix = gr.Checkbox(label="HiRes Fix")
-                adetailer = gr.Checkbox(label="Auto Face/Body Enhancement")
-                enhance_prompt = gr.Checkbox(label="Enhance Prompt")
-                stable_fast = gr.Checkbox(label="Stable Fast Mode")
-            with gr.Row():
-                reuse_seed = gr.Checkbox(label="Reuse Seed")
-                flux_enabled = gr.Checkbox(label="Flux Mode")
-                prio_speed = gr.Checkbox(label="Prioritize Speed")
-                realistic_model = gr.Checkbox(label="Realistic Model")
-            with gr.Row():
-                img2img_enabled = gr.Checkbox(label="Image to Image Mode")
-                img2img_image = gr.Image(label="Input Image for img2img", visible=False)
-            # Make input image visible only when img2img is enabled
-            img2img_enabled.change(
-                fn=lambda x: gr.update(visible=x),
-                inputs=[img2img_enabled],
-                outputs=[img2img_image],
-            )
-            generate_btn = gr.Button("Generate")
-        # Output gallery
-        gallery = gr.Gallery(
-            label="Generated Images",
-            show_label=True,
-            elem_id="gallery",
-            columns=[2],
-            rows=[2],
-            object_fit="contain",
-            height="auto",
-        )
-    # Connect generate button to pipeline
-    generate_btn.click(
-        fn=generate_images,
-        inputs=[
-            prompt,
-            width,
-            height,
-            num_images,
-            batch_size,
-            hires_fix,
-            adetailer,
-            enhance_prompt,
-            img2img_enabled,
-            img2img_image,
-            stable_fast,
-            reuse_seed,
-            flux_enabled,
-            prio_speed,
-            realistic_model,
-        ],
-        outputs=gallery,
-    )
-def is_huggingface_space():
-    return "SPACE_ID" in os.environ
-# For local testing
-if __name__ == "__main__":
-    if is_huggingface_space():
-        demo.launch(
-            debug=False,
-            server_name="0.0.0.0",
-            server_port=7860,  # Standard HF Spaces port
-        )
-    else:
-        demo.launch(
-            server_name="0.0.0.0",
-            server_port=8000,
-            auth=None,
-            share=True,  # Only enable sharing locally
-            debug=True,
-        )

+import glob
+import gradio as gr
+import sys
+import os
+from PIL import Image
+import numpy as np
+import spaces
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../..")))
+from modules.user.pipeline import pipeline
+import torch
+def load_generated_images():
+    """Load generated images with given prefix from disk"""
+    image_files = glob.glob("./_internal/output/**/*.png")
+    # If there are no image files, return
+    if not image_files:
+        return []
+    # Sort files by modification time in descending order
+    image_files.sort(key=os.path.getmtime, reverse=True)
+    # Get most recent timestamp
+    latest_time = os.path.getmtime(image_files[0])
+    # Get all images from same batch (within 1 second of most recent)
+    batch_images = []
+    for file in image_files:
+        if abs(os.path.getmtime(file) - latest_time) < 1.0:
+            try:
+                img = Image.open(file)
+                batch_images.append(img)
+            except:
+                continue
+    if not batch_images:
+        return []
+    return batch_images
+@spaces.GPU
+def generate_images(
+    prompt: str,
+    width: int = 512,
+    height: int = 512,
+    num_images: int = 1,
+    batch_size: int = 1,
+    hires_fix: bool = False,
+    adetailer: bool = False,
+    enhance_prompt: bool = False,
+    img2img_enabled: bool = False,
+    img2img_image: str = None,
+    stable_fast: bool = False,
+    reuse_seed: bool = False,
+    flux_enabled: bool = False,
+    prio_speed: bool = False,
+    realistic_model: bool = False,
+    multiscale_enabled: bool = True,
+    multiscale_intermittent: bool = False,
+    multiscale_factor: float = 0.5,
+    multiscale_fullres_start: int = 3,
+    multiscale_fullres_end: int = 8,
+    keep_models_loaded: bool = True,
+    progress=gr.Progress(),
+):
+    """Generate images using the LightDiffusion pipeline"""
+    try:
+        # Set model persistence preference
+        from modules.Device.ModelCache import set_keep_models_loaded
+        set_keep_models_loaded(keep_models_loaded)
+        if img2img_enabled and img2img_image is not None:
+            # Convert numpy array to PIL Image
+            if isinstance(img2img_image, np.ndarray):
+                img_pil = Image.fromarray(img2img_image)
+                img_pil.save("temp_img2img.png")
+                prompt = "temp_img2img.png"
+        # Run pipeline and capture saved images
+        with torch.inference_mode():
+            pipeline(
+                prompt=prompt,
+                w=width,
+                h=height,
+                number=num_images,
+                batch=batch_size,
+                hires_fix=hires_fix,
+                adetailer=adetailer,
+                enhance_prompt=enhance_prompt,
+                img2img=img2img_enabled,
+                stable_fast=stable_fast,
+                reuse_seed=reuse_seed,
+                flux_enabled=flux_enabled,
+                prio_speed=prio_speed,
+                autohdr=True,
+                realistic_model=realistic_model,
+                enable_multiscale=multiscale_enabled,
+                multiscale_intermittent_fullres=multiscale_intermittent,
+                multiscale_factor=multiscale_factor,
+                multiscale_fullres_start=multiscale_fullres_start,
+                multiscale_fullres_end=multiscale_fullres_end,
+            )
+        # Clean up temporary file if it exists
+        if os.path.exists("temp_img2img.png"):
+            os.remove("temp_img2img.png")
+        return load_generated_images()
+    except Exception:
+        import traceback
+        print(traceback.format_exc())
+        # Clean up temporary file if it exists
+        if os.path.exists("temp_img2img.png"):
+            os.remove("temp_img2img.png")
+        return [Image.new("RGB", (512, 512), color="black")]
+def get_vram_info():
+    """Get VRAM usage information"""
+    try:
+        from modules.Device.ModelCache import get_memory_info
+        info = get_memory_info()
+        return f"""
+**VRAM Usage:**
+- Total: {info["total_vram"]:.1f} GB
+- Used: {info["used_vram"]:.1f} GB
+- Free: {info["free_vram"]:.1f} GB
+- Keep Models Loaded: {info["keep_loaded"]}
+- Has Cached Checkpoint: {info["has_cached_checkpoint"]}
+"""
+    except Exception as e:
+        return f"Error getting VRAM info: {e}"
+def clear_model_cache_ui():
+    """Clear model cache from UI"""
+    try:
+        from modules.Device.ModelCache import clear_model_cache
+        clear_model_cache()
+        return "✅ Model cache cleared successfully!"
+    except Exception as e:
+        return f"❌ Error clearing cache: {e}"
+def apply_multiscale_preset(preset_name):
+    """Apply multiscale preset values to the UI components"""
+    if preset_name == "None":
+        return gr.update(), gr.update(), gr.update(), gr.update(), gr.update()
+    try:
+        from modules.sample.multiscale_presets import get_preset_parameters
+        params = get_preset_parameters(preset_name)
+        return (
+            gr.update(value=params["enable_multiscale"]),
+            gr.update(value=params["multiscale_factor"]),
+            gr.update(value=params["multiscale_fullres_start"]),
+            gr.update(value=params["multiscale_fullres_end"]),
+            gr.update(value=params["multiscale_intermittent_fullres"]),
+        )
+    except Exception as e:
+        print(f"Error applying preset {preset_name}: {e}")
+        return gr.update(), gr.update(), gr.update(), gr.update(), gr.update()
+# Create Gradio interface
+with gr.Blocks(title="LightDiffusion Web UI") as demo:
+    gr.Markdown("# LightDiffusion Web UI")
+    gr.Markdown("Generate AI images using LightDiffusion")
+    gr.Markdown(
+        "This is the demo for LightDiffusion, the fastest diffusion backend for generating images. https://github.com/LightDiffusion/LightDiffusion-Next"
+    )
+    with gr.Row():
+        with gr.Column():
+            # Input components
+            prompt = gr.Textbox(label="Prompt", placeholder="Enter your prompt here...")
+            with gr.Row():
+                width = gr.Slider(
+                    minimum=64, maximum=2048, value=512, step=64, label="Width"
+                )
+                height = gr.Slider(
+                    minimum=64, maximum=2048, value=512, step=64, label="Height"
+                )
+            with gr.Row():
+                num_images = gr.Slider(
+                    minimum=1, maximum=10, value=1, step=1, label="Number of Images"
+                )
+                batch_size = gr.Slider(
+                    minimum=1, maximum=4, value=1, step=1, label="Batch Size"
+                )
+            with gr.Row():
+                hires_fix = gr.Checkbox(label="HiRes Fix")
+                adetailer = gr.Checkbox(label="Auto Face/Body Enhancement")
+                enhance_prompt = gr.Checkbox(label="Enhance Prompt")
+                stable_fast = gr.Checkbox(label="Stable Fast Mode")
+            with gr.Row():
+                reuse_seed = gr.Checkbox(label="Reuse Seed")
+                flux_enabled = gr.Checkbox(label="Flux Mode")
+                prio_speed = gr.Checkbox(label="Prioritize Speed")
+                realistic_model = gr.Checkbox(label="Realistic Model")
+            with gr.Row():
+                multiscale_enabled = gr.Checkbox(
+                    label="Multi-Scale Diffusion", value=True
+                )
+                img2img_enabled = gr.Checkbox(label="Image to Image Mode")
+                keep_models_loaded = gr.Checkbox(
+                    label="Keep Models in VRAM",
+                    value=True,
+                    info="Keep models loaded for instant reuse (faster but uses more VRAM)",
+                )
+            img2img_image = gr.Image(label="Input Image for img2img", visible=False)
+            # Multi-scale preset selection
+            with gr.Row():
+                multiscale_preset = gr.Dropdown(
+                    label="Multi-Scale Preset",
+                    choices=["None", "quality", "performance", "balanced", "disabled"],
+                    value="None",
+                    info="Select a preset to automatically configure multi-scale settings",
+                )
+                multiscale_intermittent = gr.Checkbox(
+                    label="Intermittent Full-Res",
+                    value=False,
+                    info="Enable intermittent full-resolution rendering in low-res region",
+                )
+            with gr.Row():
+                multiscale_factor = gr.Slider(
+                    minimum=0.1,
+                    maximum=1.0,
+                    value=0.5,
+                    step=0.1,
+                    label="Multi-Scale Factor",
+                )
+                multiscale_fullres_start = gr.Slider(
+                    minimum=0, maximum=10, value=3, step=1, label="Full-Res Start Steps"
+                )
+                multiscale_fullres_end = gr.Slider(
+                    minimum=0, maximum=20, value=8, step=1, label="Full-Res End Steps"
+                )
+            # Make input image visible only when img2img is enabled
+            img2img_enabled.change(
+                fn=lambda x: gr.update(visible=x),
+                inputs=[img2img_enabled],
+                outputs=[img2img_image],
+            )
+            # Handle preset changes
+            multiscale_preset.change(
+                fn=apply_multiscale_preset,
+                inputs=[multiscale_preset],
+                outputs=[
+                    multiscale_enabled,
+                    multiscale_factor,
+                    multiscale_fullres_start,
+                    multiscale_fullres_end,
+                    multiscale_intermittent,
+                ],
+            )
+            generate_btn = gr.Button("Generate")
+            # Model Cache Management
+            with gr.Accordion("Model Cache Management", open=False):
+                with gr.Row():
+                    vram_info_btn = gr.Button("🔍 Check VRAM Usage")
+                    clear_cache_btn = gr.Button("🗑️ Clear Model Cache")
+                vram_info_display = gr.Markdown("")
+                cache_status_display = gr.Markdown("")
+        # Output gallery
+        gallery = gr.Gallery(
+            label="Generated Images",
+            show_label=True,
+            elem_id="gallery",
+            columns=[2],
+            rows=[2],
+            object_fit="contain",
+            height="auto",
+        )
+    # Connect generate button to pipeline
+    generate_btn.click(
+        fn=generate_images,
+        inputs=[
+            prompt,
+            width,
+            height,
+            num_images,
+            batch_size,
+            hires_fix,
+            adetailer,
+            enhance_prompt,
+            img2img_enabled,
+            img2img_image,
+            stable_fast,
+            reuse_seed,
+            flux_enabled,
+            prio_speed,
+            realistic_model,
+            multiscale_enabled,
+            multiscale_intermittent,
+            multiscale_factor,
+            multiscale_fullres_start,
+            multiscale_fullres_end,
+            keep_models_loaded,
+        ],
+        outputs=gallery,
+    )
+    # Connect VRAM info and cache management buttons
+    vram_info_btn.click(
+        fn=get_vram_info,
+        outputs=vram_info_display,
+    )
+    clear_cache_btn.click(
+        fn=clear_model_cache_ui,
+        outputs=cache_status_display,
+    )
+def is_huggingface_space():
+    return "SPACE_ID" in os.environ
+def is_docker_environment():
+    return "GRADIO_SERVER_PORT" in os.environ and "GRADIO_SERVER_NAME" in os.environ
+# For local testing
+if __name__ == "__main__":
+    if is_huggingface_space():
+        demo.launch(
+            debug=False,
+            server_name="0.0.0.0",
+            server_port=7860,  # Standard HF Spaces port
+        )
+    elif is_docker_environment():
+        # Docker environment - use environment variables
+        server_name = os.environ.get("GRADIO_SERVER_NAME", "0.0.0.0")
+        server_port = int(os.environ.get("GRADIO_SERVER_PORT", 7860))
+        demo.launch(
+            debug=False,
+            server_name=server_name,
+            server_port=server_port,
+        )
+    else:
+        demo.launch(
+            server_name="0.0.0.0",
+            server_port=8000,
+            auth=None,
+            share=True,  # Only enable sharing locally
+            debug=True,
+        )

docker-compose.yml ADDED Viewed

	@@ -0,0 +1,31 @@

+services:
+  lightdiffusion:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    ports:
+      - "7860:7860"
+    volumes:
+      # Mount output directory to persist generated images
+      - ./output:/app/_internal/output
+      # Mount checkpoints directory for model files
+      - ./checkpoints:/app/_internal/checkpoints
+      # Mount other model directories
+      - ./loras:/app/_internal/loras
+      - ./embeddings:/app/_internal/embeddings
+      - ./ESRGAN:/app/_internal/ESRGAN
+      - ./yolos:/app/_internal/yolos
+    environment:
+      - GRADIO_SERVER_NAME=0.0.0.0
+      - GRADIO_SERVER_PORT=7860
+      - CUDA_VISIBLE_DEVICES=0
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
+    restart: unless-stopped
+    stdin_open: true
+    tty: true

modules/Device/ModelCache.py ADDED Viewed

	@@ -0,0 +1,169 @@

+"""
+Model Persistence Manager for LightDiffusion
+Keeps models loaded in VRAM for instant reuse between generations
+"""
+from typing import Dict, Optional, Any, Tuple, List
+import logging
+from modules.Device import Device
+class ModelCache:
+    """Global model cache to keep models loaded in VRAM"""
+    def __init__(self):
+        self._cached_models: Dict[str, Any] = {}
+        self._cached_clip: Optional[Any] = None
+        self._cached_vae: Optional[Any] = None
+        self._cached_model_patcher: Optional[Any] = None
+        self._cached_conditions: Dict[str, Any] = {}
+        self._last_checkpoint_path: Optional[str] = None
+        self._keep_models_loaded: bool = True
+        self._loaded_models_list: List[Any] = []
+    def set_keep_models_loaded(self, keep_loaded: bool) -> None:
+        """Enable or disable keeping models loaded in VRAM"""
+        self._keep_models_loaded = keep_loaded
+        if not keep_loaded:
+            self.clear_cache()
+    def get_keep_models_loaded(self) -> bool:
+        """Check if models should be kept loaded"""
+        return self._keep_models_loaded
+    def cache_checkpoint(
+        self, checkpoint_path: str, model_patcher: Any, clip: Any, vae: Any
+    ) -> None:
+        """Cache a loaded checkpoint"""
+        if not self._keep_models_loaded:
+            return
+        self._last_checkpoint_path = checkpoint_path
+        self._cached_model_patcher = model_patcher
+        self._cached_clip = clip
+        self._cached_vae = vae
+        logging.info(f"Cached checkpoint: {checkpoint_path}")
+    def get_cached_checkpoint(
+        self, checkpoint_path: str
+    ) -> Optional[Tuple[Any, Any, Any]]:
+        """Get cached checkpoint if available"""
+        if not self._keep_models_loaded:
+            return None
+        if (
+            self._last_checkpoint_path == checkpoint_path
+            and self._cached_model_patcher is not None
+            and self._cached_clip is not None
+            and self._cached_vae is not None
+        ):
+            logging.info(f"Using cached checkpoint: {checkpoint_path}")
+            return self._cached_model_patcher, self._cached_clip, self._cached_vae
+        return None
+    def cache_sampling_models(self, models: List[Any]) -> None:
+        """Cache models used during sampling"""
+        if not self._keep_models_loaded:
+            return
+        self._loaded_models_list = models.copy()
+    def get_cached_sampling_models(self) -> List[Any]:
+        """Get cached sampling models"""
+        if not self._keep_models_loaded:
+            return []
+        return self._loaded_models_list
+    def prevent_model_cleanup(self, conds: Dict[str, Any], models: List[Any]) -> None:
+        """Prevent models from being cleaned up if caching is enabled"""
+        if not self._keep_models_loaded:
+            # Original cleanup behavior
+            from modules.cond import cond_util
+            cond_util.cleanup_additional_models(models)
+            control_cleanup = []
+            for k in conds:
+                control_cleanup += cond_util.get_models_from_cond(conds[k], "control")
+            cond_util.cleanup_additional_models(set(control_cleanup))
+        else:
+            # Keep models loaded - only cleanup control models that aren't main models
+            control_cleanup = []
+            for k in conds:
+                from modules.cond import cond_util
+                control_cleanup += cond_util.get_models_from_cond(conds[k], "control")
+            # Only cleanup control models, not the main models
+            from modules.cond import cond_util
+            cond_util.cleanup_additional_models(set(control_cleanup))
+            logging.info("Kept main models loaded in VRAM for reuse")
+    def clear_cache(self) -> None:
+        """Clear all cached models"""
+        if self._cached_model_patcher is not None:
+            try:
+                # Properly unload the cached models
+                if hasattr(self._cached_model_patcher, "model_unload"):
+                    self._cached_model_patcher.model_unload()
+            except Exception as e:
+                logging.warning(f"Error unloading cached model: {e}")
+        self._cached_models.clear()
+        self._cached_clip = None
+        self._cached_vae = None
+        self._cached_model_patcher = None
+        self._cached_conditions.clear()
+        self._last_checkpoint_path = None
+        self._loaded_models_list.clear()
+        # Force cleanup
+        Device.cleanup_models(keep_clone_weights_loaded=False)
+        Device.soft_empty_cache(force=True)
+        logging.info("Cleared model cache and freed VRAM")
+    def get_memory_info(self) -> Dict[str, Any]:
+        """Get memory usage information"""
+        device = Device.get_torch_device()
+        total_mem = Device.get_total_memory(device)
+        free_mem = Device.get_free_memory(device)
+        used_mem = total_mem - free_mem
+        return {
+            "total_vram": total_mem / (1024 * 1024 * 1024),  # GB
+            "used_vram": used_mem / (1024 * 1024 * 1024),  # GB
+            "free_vram": free_mem / (1024 * 1024 * 1024),  # GB
+            "cached_models": len(self._cached_models),
+            "keep_loaded": self._keep_models_loaded,
+            "has_cached_checkpoint": self._cached_model_patcher is not None,
+        }
+# Global model cache instance
+model_cache = ModelCache()
+def get_model_cache() -> ModelCache:
+    """Get the global model cache instance"""
+    return model_cache
+def set_keep_models_loaded(keep_loaded: bool) -> None:
+    """Global function to enable/disable model persistence"""
+    model_cache.set_keep_models_loaded(keep_loaded)
+def get_keep_models_loaded() -> bool:
+    """Global function to check if models should be kept loaded"""
+    return model_cache.get_keep_models_loaded()
+def clear_model_cache() -> None:
+    """Global function to clear model cache"""
+    model_cache.clear_cache()
+def get_memory_info() -> Dict[str, Any]:
+    """Global function to get memory info"""
+    return model_cache.get_memory_info()

modules/FileManaging/Loader.py CHANGED Viewed

@@ -128,6 +128,19 @@ class CheckpointLoaderSimple:
             - `tuple`: The model patcher, CLIP, and VAE.
         """
         ckpt_path = f"{ckpt_name}"
         out = load_checkpoint_guess_config(
             ckpt_path,
             output_vae=output_vae,
@@ -135,4 +148,9 @@ class CheckpointLoaderSimple:
             embedding_directory="./_internal/embeddings/",
         )
         print("loading", ckpt_path)
         return out[:3]

             - `tuple`: The model patcher, CLIP, and VAE.
         """
         ckpt_path = f"{ckpt_name}"
+        # Check if model is already cached
+        from modules.Device.ModelCache import get_model_cache
+        cache = get_model_cache()
+        cached_result = cache.get_cached_checkpoint(ckpt_path)
+        if cached_result is not None:
+            model_patcher, clip, vae = cached_result
+            print("using cached", ckpt_path)
+            return (model_patcher, clip, vae)
+        # Load normally if not cached
         out = load_checkpoint_guess_config(
             ckpt_path,
             output_vae=output_vae,
             embedding_directory="./_internal/embeddings/",
         )
         print("loading", ckpt_path)
+        # Cache the loaded checkpoint
+        if len(out) >= 3:
+            cache.cache_checkpoint(ckpt_path, out[0], out[1], out[2])
         return out[:3]

modules/UltimateSDUpscale/image_util.py CHANGED Viewed

@@ -140,9 +140,41 @@ def tensor_to_pil(img_tensor: torch.Tensor, batch_index: int = 0) -> Image.Image
     #### Returns:
         - `Image.Image`: The converted PIL image.
     """
-    img_tensor = img_tensor[batch_index].unsqueeze(0)
-    i = 255.0 * img_tensor.cpu().numpy()
-    img = Image.fromarray(np.clip(i, 0, 255).astype(np.uint8).squeeze())
     return img
@@ -155,9 +187,20 @@ def pil_to_tensor(image: Image.Image) -> torch.Tensor:
     #### Returns:
         - `torch.Tensor`: The converted tensor.
     """
-    image = np.array(image).astype(np.float32) / 255.0
-    image = torch.from_numpy(image).unsqueeze(0)
-    return image
 def get_crop_region(mask: Image.Image, pad: int = 0) -> tuple:
@@ -260,6 +303,6 @@ def crop_cond(cond: list, region: tuple, init_size: tuple, canvas_size: tuple, t
     cropped = []
     for emb, x in cond:
         cond_dict = x.copy()
-        n = [emb, cond_dict]
-        cropped.append(n)
-    return cropped

     #### Returns:
         - `Image.Image`: The converted PIL image.
     """
+    # Get the tensor for the specified batch index
+    tensor = img_tensor[batch_index]
+    # Handle different tensor dimensions
+    # The upscaler outputs in [H, W, C] format after movedim(-3, -1)
+    if tensor.dim() == 3:  # [H, W, C] - already in correct format
+        pass
+    elif tensor.dim() == 2:  # [H, W] - grayscale
+        pass
+    else:
+        raise ValueError(f"Unexpected tensor dimensions: {tensor.shape}")
+    # Clamp values to valid range [0, 1] and convert to numpy
+    tensor = torch.clamp(tensor, 0.0, 1.0)
+    numpy_array = (tensor.cpu().numpy() * 255.0).astype(np.uint8)
+    # Handle different channel configurations
+    if numpy_array.ndim == 3:
+        if numpy_array.shape[2] == 3:
+            img = Image.fromarray(numpy_array, 'RGB')
+        elif numpy_array.shape[2] == 1:
+            img = Image.fromarray(numpy_array.squeeze(axis=2), 'L')
+        elif numpy_array.shape[2] == 4:
+            img = Image.fromarray(numpy_array, 'RGBA')
+        else:
+            # Fallback: take first 3 channels if more than 3, or convert single channel to grayscale
+            if numpy_array.shape[2] >= 3:
+                img = Image.fromarray(numpy_array[:, :, :3], 'RGB')
+            else:
+                img = Image.fromarray(numpy_array.squeeze(axis=2), 'L')
+    elif numpy_array.ndim == 2:
+        img = Image.fromarray(numpy_array, 'L')
+    else:
+        raise ValueError(f"Cannot convert array with shape {numpy_array.shape} to PIL image")
     return img
     #### Returns:
         - `torch.Tensor`: The converted tensor.
     """
+    # Convert RGBA to RGB if necessary (upscaler models expect 3 channels)
+    if image.mode == 'RGBA':
+        # Create a white background for transparency
+        background = Image.new('RGB', image.size, (255, 255, 255))
+        background.paste(image, mask=image.split()[-1])  # Use alpha channel as mask
+        image = background
+    elif image.mode != 'RGB':
+        image = image.convert('RGB')
+      # Convert to numpy array and normalize
+    image_array = np.array(image).astype(np.float32) / 255.0
+    # Convert to tensor and add batch dimension: [H, W, C] -> [1, H, W, C]
+    tensor = torch.from_numpy(image_array).unsqueeze(0)
+    return tensor
 def get_crop_region(mask: Image.Image, pad: int = 0) -> tuple:
     cropped = []
     for emb, x in cond:
         cond_dict = x.copy()
+        cond_entry = [emb, cond_dict]
+        cropped.append(cond_entry)
+    return cropped

modules/sample/CFG.py CHANGED Viewed

@@ -347,7 +347,10 @@ class CFGGuider:
             pipeline=pipeline,
         )
-        cond_util.cleanup_models(self.conds, self.loaded_models)
         del self.inner_model
         del self.conds
         del self.loaded_models

             pipeline=pipeline,
         )
+        # Use model cache to prevent cleanup if models should stay loaded
+        from modules.Device.ModelCache import get_model_cache
+        get_model_cache().prevent_model_cleanup(self.conds, self.loaded_models)
         del self.inner_model
         del self.conds
         del self.loaded_models

modules/sample/multiscale_presets.py ADDED Viewed

	@@ -0,0 +1,143 @@

+"""
+Multi-scale diffusion presets for quality and performance optimization.
+This module provides predefined configurations for multi-scale diffusion that balance
+quality and performance based on different use cases.
+"""
+from typing import NamedTuple, Dict, Any
+class MultiscalePreset(NamedTuple):
+    """#### Class representing a multi-scale diffusion preset.
+    #### Args:
+        - `name` (str): The name of the preset.
+        - `description` (str): Description of the preset's purpose.
+        - `enable_multiscale` (bool): Whether multi-scale diffusion is enabled.
+        - `multiscale_factor` (float): Scale factor for intermediate steps (0.1-1.0).
+        - `multiscale_fullres_start` (int): Number of first steps at full resolution.
+        - `multiscale_fullres_end` (int): Number of last steps at full resolution.
+        - `multiscale_intermittent_fullres` (bool): Whether to use intermittent full-res.
+    """
+    name: str
+    description: str
+    enable_multiscale: bool
+    multiscale_factor: float
+    multiscale_fullres_start: int
+    multiscale_fullres_end: int
+    multiscale_intermittent_fullres: bool
+    @property
+    def as_dict(self) -> Dict[str, Any]:
+        """#### Convert the preset to a dictionary.
+        #### Returns:
+            - `Dict[str, Any]`: The preset parameters as a dictionary.
+        """
+        return {
+            "enable_multiscale": self.enable_multiscale,
+            "multiscale_factor": self.multiscale_factor,
+            "multiscale_fullres_start": self.multiscale_fullres_start,
+            "multiscale_fullres_end": self.multiscale_fullres_end,
+            "multiscale_intermittent_fullres": self.multiscale_intermittent_fullres,
+        }
+# Predefined multi-scale diffusion presets
+MULTISCALE_PRESETS = {
+    "quality": MultiscalePreset(
+        name="Quality",
+        description="High quality preset with intermittent full-res for best image quality",
+        enable_multiscale=True,
+        multiscale_factor=0.5,
+        multiscale_fullres_start=10,
+        multiscale_fullres_end=8,
+        multiscale_intermittent_fullres=True,
+    ),
+    "performance": MultiscalePreset(
+        name="Performance",
+        description="Performance-oriented preset with aggressive downscaling for maximum speed",
+        enable_multiscale=True,
+        multiscale_factor=0.25,
+        multiscale_fullres_start=5,
+        multiscale_fullres_end=8,
+        multiscale_intermittent_fullres=True,
+    ),
+    "balanced": MultiscalePreset(
+        name="Balanced",
+        description="Balanced preset offering good quality and performance",
+        enable_multiscale=True,
+        multiscale_factor=0.5,
+        multiscale_fullres_start=5,
+        multiscale_fullres_end=8,
+        multiscale_intermittent_fullres=True,
+    ),
+    "disabled": MultiscalePreset(
+        name="Disabled",
+        description="Multi-scale diffusion disabled - full resolution throughout",
+        enable_multiscale=False,
+        multiscale_factor=1.0,
+        multiscale_fullres_start=0,
+        multiscale_fullres_end=0,
+        multiscale_intermittent_fullres=False,
+    ),
+}
+def get_preset(preset_name: str) -> MultiscalePreset:
+    """#### Get a multi-scale diffusion preset by name.
+    #### Args:
+        - `preset_name` (str): The name of the preset to retrieve.
+    #### Returns:
+        - `MultiscalePreset`: The requested preset.
+    #### Raises:
+        - `KeyError`: If the preset name is not found.
+    """
+    if preset_name not in MULTISCALE_PRESETS:
+        available_presets = ", ".join(MULTISCALE_PRESETS.keys())
+        raise KeyError(
+            f"Preset '{preset_name}' not found. Available presets: {available_presets}"
+        )
+    return MULTISCALE_PRESETS[preset_name]
+def get_preset_parameters(preset_name: str) -> Dict[str, Any]:
+    """#### Get multi-scale diffusion parameters for a preset.
+    #### Args:
+        - `preset_name` (str): The name of the preset.
+    #### Returns:
+        - `Dict[str, Any]`: The preset parameters.
+    """
+    return get_preset(preset_name).as_dict
+def list_presets() -> Dict[str, str]:
+    """#### List all available multi-scale diffusion presets.
+    #### Returns:
+        - `Dict[str, str]`: Dictionary mapping preset names to descriptions.
+    """
+    return {name: preset.description for name, preset in MULTISCALE_PRESETS.items()}
+def apply_preset_to_kwargs(preset_name: str, kwargs: Dict[str, Any]) -> Dict[str, Any]:
+    """#### Apply a multi-scale preset to keyword arguments.
+    #### Args:
+        - `preset_name` (str): The name of the preset to apply.
+        - `kwargs` (Dict[str, Any]): Existing keyword arguments.
+    #### Returns:
+        - `Dict[str, Any]`: Updated keyword arguments with preset parameters.
+    """
+    preset_params = get_preset_parameters(preset_name)
+    kwargs.update(preset_params)
+    return kwargs

modules/sample/samplers.py CHANGED Viewed

@@ -21,6 +21,12 @@ def sample_euler_ancestral(
     s_noise=1.0,
     noise_sampler=None,
     pipeline=False,
 ):
     # Pre-calculate common values
     device = x.device
@@ -31,6 +37,90 @@ def sample_euler_ancestral(
         from modules.AutoEncoders import taesd
         from modules.user import app_instance
     # Pre-allocate tensors and init noise sampler
     s_in = torch.ones((x.shape[0],), device=device)
     noise_sampler = (
@@ -50,8 +140,24 @@ def sample_euler_ancestral(
         if not pipeline:
             app_instance.app.progress.set(i / (len(sigmas) - 1))
-        # Combined model inference and step calculation
-        denoised = model(x, sigmas[i] * s_in, **(extra_args or {}))
         sigma_down, sigma_up = sampling_util.get_ancestral_step(
             sigmas[i], sigmas[i + 1], eta=eta
         )
@@ -83,6 +189,12 @@ def sample_euler(
     s_tmax=float("inf"),
     s_noise=1.0,
     pipeline=False,
 ):
     # Pre-calculate common values
     device = x.device
@@ -93,6 +205,90 @@ def sample_euler(
         from modules.AutoEncoders import taesd
         from modules.user import app_instance
     # Pre-allocate tensors and cache parameters
     s_in = torch.ones((x.shape[0],), device=device)
     gamma_max = min(s_churn / (len(sigmas) - 1), 2**0.5 - 1) if s_churn > 0 else 0
@@ -108,6 +304,9 @@ def sample_euler(
         if not pipeline:
             app_instance.app.progress.set(i / (len(sigmas) - 1))
         # Combined sigma calculation and update
         sigma_hat = (
             sigmas[i] * (1 + (gamma_max if s_tmin <= sigmas[i] <= s_tmax else 0))
@@ -121,8 +320,21 @@ def sample_euler(
                 + torch.randn_like(x) * s_noise * (sigma_hat**2 - sigmas[i] ** 2) ** 0.5
             )
-        # Fused model inference and update step
-        denoised = model(x, sigma_hat * s_in, **(extra_args or {}))
         x = x + util.to_d(x, sigma_hat, denoised) * (sigmas[i + 1] - sigma_hat)
         if callback is not None:
@@ -588,6 +800,12 @@ def sample_dpmpp_2m_cfgpp(
     cfg_x0_scale=1.0,
     cfg_s_scale=1.0,
     cfg_min=1.0,
 ):
     """DPM-Solver++(2M) sampler with CFG++ optimizations"""
     # Pre-calculate common values and setup
@@ -599,11 +817,94 @@ def sample_dpmpp_2m_cfgpp(
         from modules.AutoEncoders import taesd
         from modules.user import app_instance
     # Pre-allocate tensors and transform sigmas
     s_in = torch.ones((x.shape[0],), device=device)
     t_steps = -torch.log(sigmas)  # Fused calculation
     n_steps = len(sigmas) - 1
     # Pre-calculate all needed values in one go
     sigma_steps = torch.exp(-t_steps)  # Fused calculation
     ratios = sigma_steps[1:] / sigma_steps[:-1]
@@ -639,15 +940,31 @@ def sample_dpmpp_2m_cfgpp(
         if not pipeline:
             app_instance.app.progress.set(i / n_steps)
         # Use pre-calculated CFG scale
         current_cfg = cfg_values[i]
-        # Fused model inference and update calculations
-        denoised = model(x, sigmas[i] * s_in, **extra_args)
         uncond_denoised = extra_args.get("model_options", {}).get(
             "sampler_post_cfg_function", []
         )[-1]({"denoised": denoised, "uncond_denoised": None})
         if callback is not None:
             callback(
                 {
@@ -713,8 +1030,14 @@ def sample_dpmpp_sde_cfgpp(
     cfg_x0_scale=1.0,
     cfg_s_scale=1.0,
     cfg_min=1.0,
 ):
-    """DPM-Solver++ (SDE) with CFG++ optimizations"""
     # Pre-calculate common values
     device = x.device
     global disable_gui
@@ -728,9 +1051,91 @@ def sample_dpmpp_sde_cfgpp(
     if len(sigmas) <= 1:
         return x
-    # Pre-allocate tensors and values
-    s_in = torch.ones((x.shape[0],), device=device)
     n_steps = len(sigmas) - 1
     extra_args = {} if extra_args is None else extra_args
     # CFG++ scheduling
@@ -751,7 +1156,7 @@ def sample_dpmpp_sde_cfgpp(
             x, sigmas[sigmas > 0].min(), sigmas.max(), seed=seed, cpu=True
         )
-    # Track previous predictions
     old_denoised = None
     old_uncond_denoised = None
@@ -776,15 +1181,31 @@ def sample_dpmpp_sde_cfgpp(
         if not pipeline:
             app_instance.app.progress.set(i / n_steps)
         # Get current CFG scale
         current_cfg = get_cfg_scale(i)
-        # Model inference
-        denoised = model(x, sigmas[i] * s_in, **extra_args)
         uncond_denoised = extra_args.get("model_options", {}).get(
             "sampler_post_cfg_function", []
         )[-1]({"denoised": denoised, "uncond_denoised": None})
         if callback is not None:
             callback(
                 {
@@ -815,10 +1236,10 @@ def sample_dpmpp_sde_cfgpp(
                     uncond_denoised + (denoised - uncond_denoised) * current_cfg
                 )
             else:
-                # CFG++ with momentum
                 x0_coeff = cfg_x0_scale * current_cfg
-                # Calculate momentum terms
                 h_ratio = (t - s_) / (2 * (t - t_next))
                 momentum = (1 + h_ratio) * denoised - h_ratio * old_denoised
                 uncond_momentum = (
@@ -828,18 +1249,35 @@ def sample_dpmpp_sde_cfgpp(
                 # Combine with CFG++ scaling
                 cfg_denoised = uncond_momentum + (momentum - uncond_momentum) * x0_coeff
             x_2 = (
                 (sigma_fn(s_) / sigma_fn(t)) * x
                 - (t - s_).expm1() * cfg_denoised
-                + noise_sampler(sigma_fn(t), sigma_fn(s)) * s_noise * su
             )
             # Step 2 inference
-            denoised_2 = model(x_2, sigma_fn(s) * s_in, **extra_args)
             uncond_denoised_2 = extra_args.get("model_options", {}).get(
                 "sampler_post_cfg_function", []
             )[-1]({"denoised": denoised_2, "uncond_denoised": None})
             # Step 2 CFG++ combination
             if old_uncond_denoised is None:
                 cfg_denoised_2 = (
@@ -861,11 +1299,12 @@ def sample_dpmpp_sde_cfgpp(
             t_next_ = t_fn(sd)
             # Combined update with both predictions
             x = (
                 (sigma_fn(t_next_) / sigma_fn(t)) * x
                 - (t - t_next_).expm1()
                 * ((1 - 1 / (2 * r)) * cfg_denoised + (1 / (2 * r)) * cfg_denoised_2)
-                + noise_sampler(sigma_fn(t), sigma_fn(t_next)) * s_noise * su
             )
         old_denoised = denoised

     s_noise=1.0,
     noise_sampler=None,
     pipeline=False,
+    # Multi-scale parameters
+    enable_multiscale=True,
+    multiscale_factor=0.5,
+    multiscale_fullres_start=3,
+    multiscale_fullres_end=8,
+    multiscale_intermittent_fullres=False,
 ):
     # Pre-calculate common values
     device = x.device
         from modules.AutoEncoders import taesd
         from modules.user import app_instance
+    # Multi-scale setup with validation
+    original_shape = x.shape
+    batch_size, channels, orig_h, orig_w = original_shape
+    # Validate multi-scale parameters
+    if enable_multiscale:
+        if not (0.1 <= multiscale_factor <= 1.0):
+            print(
+                f"Warning: multiscale_factor {multiscale_factor} out of range [0.1, 1.0], disabling multi-scale"
+            )
+            enable_multiscale = False
+        if multiscale_fullres_start < 0 or multiscale_fullres_end < 0:
+            print("Warning: Invalid fullres step counts, disabling multi-scale")
+            enable_multiscale = False
+    # Calculate scaled dimensions (must be multiples of 8 for VAE compatibility)
+    scale_h = (
+        int(max(8, ((orig_h * multiscale_factor) // 8) * 8))
+        if enable_multiscale
+        else orig_h
+    )
+    scale_w = (
+        int(max(8, ((orig_w * multiscale_factor) // 8) * 8))
+        if enable_multiscale
+        else orig_w
+    )
+    # Disable multi-scale for small images or short step counts
+    n_steps = len(sigmas) - 1
+    multiscale_active = (
+        enable_multiscale
+        and orig_h > 64
+        and orig_w > 64
+        and n_steps > (multiscale_fullres_start + multiscale_fullres_end)
+        and (scale_h != orig_h or scale_w != orig_w)
+    )
+    if enable_multiscale and not multiscale_active:
+        print(
+            f"Multi-scale disabled: image too small ({orig_h}x{orig_w}) or insufficient steps ({n_steps})"
+        )
+    elif multiscale_active:
+        print(
+            f"Multi-scale active: {orig_h}x{orig_w} -> {scale_h}x{scale_w} (factor: {multiscale_factor})"
+        )
+    def downscale_tensor(tensor):
+        """Downscale tensor using bilinear interpolation"""
+        if not multiscale_active or tensor.shape[-2:] == (scale_h, scale_w):
+            return tensor
+        return torch.nn.functional.interpolate(
+            tensor, size=(scale_h, scale_w), mode="bilinear", align_corners=False
+        )
+    def upscale_tensor(tensor):
+        """Upscale tensor using bilinear interpolation"""
+        if not multiscale_active or tensor.shape[-2:] == (orig_h, orig_w):
+            return tensor
+        return torch.nn.functional.interpolate(
+            tensor, size=(orig_h, orig_w), mode="bilinear", align_corners=False
+        )
+    def should_use_fullres(step):
+        """Determine if this step should use full resolution"""
+        if not multiscale_active:
+            return True
+        # Always use full resolution for start and end steps
+        if step < multiscale_fullres_start or step >= n_steps - multiscale_fullres_end:
+            return True
+        # Intermittent full-res: every 2nd step in low-res region if enabled
+        if multiscale_intermittent_fullres:
+            # Check if we're in the low-res region
+            low_res_region_start = multiscale_fullres_start
+            low_res_region_end = n_steps - multiscale_fullres_end
+            if low_res_region_start <= step < low_res_region_end:
+                # Calculate position within low-res region
+                relative_step = step - low_res_region_start
+                # Use full-res every 2nd step (0, 2, 4, ...)
+                return relative_step % 2 == 0
+        return False
     # Pre-allocate tensors and init noise sampler
     s_in = torch.ones((x.shape[0],), device=device)
     noise_sampler = (
         if not pipeline:
             app_instance.app.progress.set(i / (len(sigmas) - 1))
+        # Determine resolution for this step
+        use_fullres = should_use_fullres(i)
+        # Scale input for processing
+        if use_fullres:
+            x_process = x
+            s_in_process = s_in
+        else:
+            x_process = downscale_tensor(x)
+            s_in_process = torch.ones((x_process.shape[0],), device=device)
+        # Model inference at appropriate resolution
+        denoised = model(x_process, sigmas[i] * s_in_process, **(extra_args or {}))
+        # Scale predictions back to original resolution if needed
+        if not use_fullres:
+            denoised = upscale_tensor(denoised)
         sigma_down, sigma_up = sampling_util.get_ancestral_step(
             sigmas[i], sigmas[i + 1], eta=eta
         )
     s_tmax=float("inf"),
     s_noise=1.0,
     pipeline=False,
+    # Multi-scale parameters
+    enable_multiscale=True,
+    multiscale_factor=0.5,
+    multiscale_fullres_start=3,
+    multiscale_fullres_end=8,
+    multiscale_intermittent_fullres=False,
 ):
     # Pre-calculate common values
     device = x.device
         from modules.AutoEncoders import taesd
         from modules.user import app_instance
+    # Multi-scale setup with validation
+    original_shape = x.shape
+    batch_size, channels, orig_h, orig_w = original_shape
+    # Validate multi-scale parameters
+    if enable_multiscale:
+        if not (0.1 <= multiscale_factor <= 1.0):
+            print(
+                f"Warning: multiscale_factor {multiscale_factor} out of range [0.1, 1.0], disabling multi-scale"
+            )
+            enable_multiscale = False
+        if multiscale_fullres_start < 0 or multiscale_fullres_end < 0:
+            print("Warning: Invalid fullres step counts, disabling multi-scale")
+            enable_multiscale = False
+    # Calculate scaled dimensions (must be multiples of 8 for VAE compatibility)
+    scale_h = (
+        int(max(8, ((orig_h * multiscale_factor) // 8) * 8))
+        if enable_multiscale
+        else orig_h
+    )
+    scale_w = (
+        int(max(8, ((orig_w * multiscale_factor) // 8) * 8))
+        if enable_multiscale
+        else orig_w
+    )
+    # Disable multi-scale for small images or short step counts
+    n_steps = len(sigmas) - 1
+    multiscale_active = (
+        enable_multiscale
+        and orig_h > 64
+        and orig_w > 64
+        and n_steps > (multiscale_fullres_start + multiscale_fullres_end)
+        and (scale_h != orig_h or scale_w != orig_w)
+    )
+    if enable_multiscale and not multiscale_active:
+        print(
+            f"Multi-scale disabled: image too small ({orig_h}x{orig_w}) or insufficient steps ({n_steps})"
+        )
+    elif multiscale_active:
+        print(
+            f"Multi-scale active: {orig_h}x{orig_w} -> {scale_h}x{scale_w} (factor: {multiscale_factor})"
+        )
+    def downscale_tensor(tensor):
+        """Downscale tensor using bilinear interpolation"""
+        if not multiscale_active or tensor.shape[-2:] == (scale_h, scale_w):
+            return tensor
+        return torch.nn.functional.interpolate(
+            tensor, size=(scale_h, scale_w), mode="bilinear", align_corners=False
+        )
+    def upscale_tensor(tensor):
+        """Upscale tensor using bilinear interpolation"""
+        if not multiscale_active or tensor.shape[-2:] == (orig_h, orig_w):
+            return tensor
+        return torch.nn.functional.interpolate(
+            tensor, size=(orig_h, orig_w), mode="bilinear", align_corners=False
+        )
+    def should_use_fullres(step):
+        """Determine if this step should use full resolution"""
+        if not multiscale_active:
+            return True
+        # Always use full resolution for start and end steps
+        if step < multiscale_fullres_start or step >= n_steps - multiscale_fullres_end:
+            return True
+        # Intermittent full-res: every 2nd step in low-res region if enabled
+        if multiscale_intermittent_fullres:
+            # Check if we're in the low-res region
+            low_res_region_start = multiscale_fullres_start
+            low_res_region_end = n_steps - multiscale_fullres_end
+            if low_res_region_start <= step < low_res_region_end:
+                # Calculate position within low-res region
+                relative_step = step - low_res_region_start
+                # Use full-res every 2nd step (0, 2, 4, ...)
+                return relative_step % 2 == 0
+        return False
     # Pre-allocate tensors and cache parameters
     s_in = torch.ones((x.shape[0],), device=device)
     gamma_max = min(s_churn / (len(sigmas) - 1), 2**0.5 - 1) if s_churn > 0 else 0
         if not pipeline:
             app_instance.app.progress.set(i / (len(sigmas) - 1))
+        # Determine resolution for this step
+        use_fullres = should_use_fullres(i)
         # Combined sigma calculation and update
         sigma_hat = (
             sigmas[i] * (1 + (gamma_max if s_tmin <= sigmas[i] <= s_tmax else 0))
                 + torch.randn_like(x) * s_noise * (sigma_hat**2 - sigmas[i] ** 2) ** 0.5
             )
+        # Scale input for processing
+        if use_fullres:
+            x_process = x
+            s_in_process = s_in
+        else:
+            x_process = downscale_tensor(x)
+            s_in_process = torch.ones((x_process.shape[0],), device=device)
+        # Model inference at appropriate resolution
+        denoised = model(x_process, sigma_hat * s_in_process, **(extra_args or {}))
+        # Scale predictions back to original resolution if needed
+        if not use_fullres:
+            denoised = upscale_tensor(denoised)
         x = x + util.to_d(x, sigma_hat, denoised) * (sigmas[i + 1] - sigma_hat)
         if callback is not None:
     cfg_x0_scale=1.0,
     cfg_s_scale=1.0,
     cfg_min=1.0,
+    # Multi-scale parameters
+    enable_multiscale=True,
+    multiscale_factor=0.5,
+    multiscale_fullres_start=5,
+    multiscale_fullres_end=8,
+    multiscale_intermittent_fullres=True,
 ):
     """DPM-Solver++(2M) sampler with CFG++ optimizations"""
     # Pre-calculate common values and setup
         from modules.AutoEncoders import taesd
         from modules.user import app_instance
+    # Multi-scale setup with validation
+    original_shape = x.shape
+    batch_size, channels, orig_h, orig_w = original_shape
+    # Validate multi-scale parameters
+    if enable_multiscale:
+        if not (0.1 <= multiscale_factor <= 1.0):
+            print(
+                f"Warning: multiscale_factor {multiscale_factor} out of range [0.1, 1.0], disabling multi-scale"
+            )
+            enable_multiscale = False
+        if multiscale_fullres_start < 0 or multiscale_fullres_end < 0:
+            print("Warning: Invalid fullres step counts, disabling multi-scale")
+            enable_multiscale = False
+    # Calculate scaled dimensions (must be multiples of 8 for VAE compatibility)
+    scale_h = (
+        int(max(8, ((orig_h * multiscale_factor) // 8) * 8))
+        if enable_multiscale
+        else orig_h
+    )
+    scale_w = (
+        int(max(8, ((orig_w * multiscale_factor) // 8) * 8))
+        if enable_multiscale
+        else orig_w
+    )
     # Pre-allocate tensors and transform sigmas
     s_in = torch.ones((x.shape[0],), device=device)
     t_steps = -torch.log(sigmas)  # Fused calculation
     n_steps = len(sigmas) - 1
+    # Disable multi-scale for small images or short step counts
+    multiscale_active = (
+        enable_multiscale
+        and orig_h > 64
+        and orig_w > 64
+        and n_steps > (multiscale_fullres_start + multiscale_fullres_end)
+        and (scale_h != orig_h or scale_w != orig_w)
+    )
+    if enable_multiscale and not multiscale_active:
+        print(
+            f"Multi-scale disabled: image too small ({orig_h}x{orig_w}) or insufficient steps ({n_steps})"
+        )
+    elif multiscale_active:
+        print(
+            f"Multi-scale active: {orig_h}x{orig_w} -> {scale_h}x{scale_w} (factor: {multiscale_factor})"
+        )
+    def downscale_tensor(tensor):
+        """Downscale tensor using bilinear interpolation"""
+        if not multiscale_active or tensor.shape[-2:] == (scale_h, scale_w):
+            return tensor
+        return torch.nn.functional.interpolate(
+            tensor, size=(scale_h, scale_w), mode="bilinear", align_corners=False
+        )
+    def upscale_tensor(tensor):
+        """Upscale tensor using bilinear interpolation"""
+        if not multiscale_active or tensor.shape[-2:] == (orig_h, orig_w):
+            return tensor
+        return torch.nn.functional.interpolate(
+            tensor, size=(orig_h, orig_w), mode="bilinear", align_corners=False
+        )
+    def should_use_fullres(step):
+        """Determine if this step should use full resolution"""
+        if not multiscale_active:
+            return True
+        # Always use full resolution for start and end steps
+        if step < multiscale_fullres_start or step >= n_steps - multiscale_fullres_end:
+            return True
+        # Intermittent full-res: every 2nd step in low-res region if enabled
+        if multiscale_intermittent_fullres:
+            # Check if we're in the low-res region
+            low_res_region_start = multiscale_fullres_start
+            low_res_region_end = n_steps - multiscale_fullres_end
+            if low_res_region_start <= step < low_res_region_end:
+                # Calculate position within low-res region
+                relative_step = step - low_res_region_start
+                # Use full-res every 2nd step (0, 2, 4, ...)
+                return relative_step % 2 == 0
+        return False
     # Pre-calculate all needed values in one go
     sigma_steps = torch.exp(-t_steps)  # Fused calculation
     ratios = sigma_steps[1:] / sigma_steps[:-1]
         if not pipeline:
             app_instance.app.progress.set(i / n_steps)
+        # Determine resolution for this step
+        use_fullres = should_use_fullres(i)
+        # Scale input for processing
+        if use_fullres:
+            x_process = x
+            s_in_process = s_in
+        else:
+            x_process = downscale_tensor(x)
+            s_in_process = torch.ones((x_process.shape[0],), device=device)
         # Use pre-calculated CFG scale
         current_cfg = cfg_values[i]
+        # Model inference at appropriate resolution
+        denoised = model(x_process, sigmas[i] * s_in_process, **extra_args)
         uncond_denoised = extra_args.get("model_options", {}).get(
             "sampler_post_cfg_function", []
         )[-1]({"denoised": denoised, "uncond_denoised": None})
+        # Scale predictions back to original resolution if needed
+        if not use_fullres:
+            denoised = upscale_tensor(denoised)
+            uncond_denoised = upscale_tensor(uncond_denoised)
         if callback is not None:
             callback(
                 {
     cfg_x0_scale=1.0,
     cfg_s_scale=1.0,
     cfg_min=1.0,
+    # Multi-scale parameters
+    enable_multiscale=True,
+    multiscale_factor=0.5,
+    multiscale_fullres_start=5,
+    multiscale_fullres_end=8,
+    multiscale_intermittent_fullres=False,
 ):
+    """DPM-Solver++ (SDE) with CFG++ optimizations and multi-scale diffusion"""
     # Pre-calculate common values
     device = x.device
     global disable_gui
     if len(sigmas) <= 1:
         return x
+    # Multi-scale setup with validation
+    original_shape = x.shape
+    batch_size, channels, orig_h, orig_w = original_shape
+    # Validate multi-scale parameters
+    if enable_multiscale:
+        if not (0.1 <= multiscale_factor <= 1.0):
+            print(
+                f"Warning: multiscale_factor {multiscale_factor} out of range [0.1, 1.0], disabling multi-scale"
+            )
+            enable_multiscale = False
+        if multiscale_fullres_start < 0 or multiscale_fullres_end < 0:
+            print("Warning: Invalid fullres step counts, disabling multi-scale")
+            enable_multiscale = False
+    # Calculate scaled dimensions (must be multiples of 8 for VAE compatibility)
+    scale_h = (
+        int(max(8, ((orig_h * multiscale_factor) // 8) * 8))
+        if enable_multiscale
+        else orig_h
+    )
+    scale_w = (
+        int(max(8, ((orig_w * multiscale_factor) // 8) * 8))
+        if enable_multiscale
+        else orig_w
+    )
+    # Disable multi-scale for small images or short step counts
     n_steps = len(sigmas) - 1
+    multiscale_active = (
+        enable_multiscale
+        and orig_h > 64
+        and orig_w > 64
+        and n_steps > (multiscale_fullres_start + multiscale_fullres_end)
+        and (scale_h != orig_h or scale_w != orig_w)
+    )
+    if enable_multiscale and not multiscale_active:
+        print(
+            f"Multi-scale disabled: image too small ({orig_h}x{orig_w}) or insufficient steps ({n_steps})"
+        )
+    elif multiscale_active:
+        print(
+            f"Multi-scale active: {orig_h}x{orig_w} -> {scale_h}x{scale_w} (factor: {multiscale_factor})"
+        )
+    def downscale_tensor(tensor):
+        """Downscale tensor using bilinear interpolation"""
+        if not multiscale_active or tensor.shape[-2:] == (scale_h, scale_w):
+            return tensor
+        return torch.nn.functional.interpolate(
+            tensor, size=(scale_h, scale_w), mode="bilinear", align_corners=False
+        )
+    def upscale_tensor(tensor):
+        """Upscale tensor using bilinear interpolation"""
+        if not multiscale_active or tensor.shape[-2:] == (orig_h, orig_w):
+            return tensor
+        return torch.nn.functional.interpolate(
+            tensor, size=(orig_h, orig_w), mode="bilinear", align_corners=False
+        )
+    def should_use_fullres(step):
+        """Determine if this step should use full resolution"""
+        if not multiscale_active:
+            return True
+        # Always use full resolution for start and end steps
+        if step < multiscale_fullres_start or step >= n_steps - multiscale_fullres_end:
+            return True
+        # Intermittent full-res: every 2nd step in low-res region if enabled
+        if multiscale_intermittent_fullres:
+            # Check if we're in the low-res region
+            low_res_region_start = multiscale_fullres_start
+            low_res_region_end = n_steps - multiscale_fullres_end
+            if low_res_region_start <= step < low_res_region_end:
+                # Calculate position within low-res region
+                relative_step = step - low_res_region_start
+                # Use full-res every 2nd step (0, 2, 4, ...)
+                return relative_step % 2 == 0
+        return False  # Pre-allocate tensors and values
+    s_in = torch.ones((x.shape[0],), device=device)
     extra_args = {} if extra_args is None else extra_args
     # CFG++ scheduling
             x, sigmas[sigmas > 0].min(), sigmas.max(), seed=seed, cpu=True
         )
+    # Track previous predictions for momentum (stored at original resolution)
     old_denoised = None
     old_uncond_denoised = None
         if not pipeline:
             app_instance.app.progress.set(i / n_steps)
+        # Determine resolution for this step
+        use_fullres = should_use_fullres(i)
+        # Scale input for processing
+        if use_fullres:
+            x_process = x
+            s_in_process = s_in
+        else:
+            x_process = downscale_tensor(x)
+            s_in_process = torch.ones((x_process.shape[0],), device=device)
         # Get current CFG scale
         current_cfg = get_cfg_scale(i)
+        # Model inference at appropriate resolution
+        denoised = model(x_process, sigmas[i] * s_in_process, **extra_args)
         uncond_denoised = extra_args.get("model_options", {}).get(
             "sampler_post_cfg_function", []
         )[-1]({"denoised": denoised, "uncond_denoised": None})
+        # Scale predictions back to original resolution if needed
+        if not use_fullres:
+            denoised = upscale_tensor(denoised)
+            uncond_denoised = upscale_tensor(uncond_denoised)
         if callback is not None:
             callback(
                 {
                     uncond_denoised + (denoised - uncond_denoised) * current_cfg
                 )
             else:
+                # CFG++ with momentum (using properly scaled momentum terms)
                 x0_coeff = cfg_x0_scale * current_cfg
+                # Calculate momentum terms at original resolution
                 h_ratio = (t - s_) / (2 * (t - t_next))
                 momentum = (1 + h_ratio) * denoised - h_ratio * old_denoised
                 uncond_momentum = (
                 # Combine with CFG++ scaling
                 cfg_denoised = uncond_momentum + (momentum - uncond_momentum) * x0_coeff
+            # Calculate x_2 for step 2
+            noise_step1 = noise_sampler(sigma_fn(t), sigma_fn(s)) * s_noise * su
             x_2 = (
                 (sigma_fn(s_) / sigma_fn(t)) * x
                 - (t - s_).expm1() * cfg_denoised
+                + noise_step1
             )
+            # Step 2 inference - determine resolution
+            use_fullres_step2 = should_use_fullres(i) if multiscale_active else True
+            if use_fullres_step2:
+                x_2_process = x_2
+                s_in_process_2 = s_in
+            else:
+                x_2_process = downscale_tensor(x_2)
+                s_in_process_2 = torch.ones((x_2_process.shape[0],), device=device)
             # Step 2 inference
+            denoised_2 = model(x_2_process, sigma_fn(s) * s_in_process_2, **extra_args)
             uncond_denoised_2 = extra_args.get("model_options", {}).get(
                 "sampler_post_cfg_function", []
             )[-1]({"denoised": denoised_2, "uncond_denoised": None})
+            # Scale step 2 predictions back if needed
+            if not use_fullres_step2:
+                denoised_2 = upscale_tensor(denoised_2)
+                uncond_denoised_2 = upscale_tensor(uncond_denoised_2)
             # Step 2 CFG++ combination
             if old_uncond_denoised is None:
                 cfg_denoised_2 = (
             t_next_ = t_fn(sd)
             # Combined update with both predictions
+            noise_final = noise_sampler(sigma_fn(t), sigma_fn(t_next)) * s_noise * su
             x = (
                 (sigma_fn(t_next_) / sigma_fn(t)) * x
                 - (t - t_next_).expm1()
                 * ((1 - 1 / (2 * r)) * cfg_denoised + (1 / (2 * r)) * cfg_denoised_2)
+                + noise_final
             )
         old_denoised = denoised

modules/sample/sampling.py CHANGED Viewed

@@ -788,6 +788,12 @@ class KSampler:
         disable_noise: bool = False,
         pipeline: bool = False,
         flux: bool = False,
     ) -> tuple:
         """Unified sampling interface that works both as direct sampling and through the common_ksampler.
@@ -870,6 +876,11 @@ class KSampler:
                 force_full_denoise,
                 pipeline or self.pipeline,
                 flux,
             )
@@ -896,6 +907,12 @@ def sample1(
     seed: int = None,
     pipeline: bool = False,
     flux: bool = False,
 ) -> torch.Tensor:
     """Sample using the given parameters with the unified KSampler.
@@ -925,32 +942,102 @@ def sample1(
     Returns:
         torch.Tensor: The sampled tensor.
     """
-    sampler = KSampler(
-        model=model,
-        steps=steps,
-        sampler=sampler_name,
-        scheduler=scheduler,
-        denoise=denoise,
-        model_options=model.model_options,
-        pipeline=pipeline,
-    )
-    samples = sampler.direct_sample(
-        noise,
-        positive,
-        negative,
-        cfg=cfg,
-        latent_image=latent_image,
-        start_step=start_step,
-        last_step=last_step,
-        force_full_denoise=force_full_denoise,
-        denoise_mask=noise_mask,
-        sigmas=sigmas,
-        callback=callback,
-        disable_pbar=disable_pbar,
-        seed=seed,
-        flux=flux,
-    )
     samples = samples.to(Device.intermediate_device())
     return samples
@@ -1066,6 +1153,12 @@ def common_ksampler(
     force_full_denoise: bool = False,
     pipeline: bool = False,
     flux: bool = False,
 ) -> tuple:
     """Common ksampler function.
@@ -1126,6 +1219,11 @@ def common_ksampler(
         seed=seed,
         pipeline=pipeline,
         flux=flux,
     )
     out = latent.copy()
     out["samples"] = samples

         disable_noise: bool = False,
         pipeline: bool = False,
         flux: bool = False,
+        # Multi-scale diffusion parameters
+        enable_multiscale: bool = True,
+        multiscale_factor: float = 0.5,
+        multiscale_fullres_start: int = 3,
+        multiscale_fullres_end: int = 8,
+        multiscale_intermittent_fullres: bool = False,
     ) -> tuple:
         """Unified sampling interface that works both as direct sampling and through the common_ksampler.
                 force_full_denoise,
                 pipeline or self.pipeline,
                 flux,
+                enable_multiscale,
+                multiscale_factor,
+                multiscale_fullres_start,
+                multiscale_fullres_end,
+                multiscale_intermittent_fullres,
             )
     seed: int = None,
     pipeline: bool = False,
     flux: bool = False,
+    # Multi-scale diffusion parameters
+    enable_multiscale: bool = True,
+    multiscale_factor: float = 0.5,
+    multiscale_fullres_start: int = 3,
+    multiscale_fullres_end: int = 8,
+    multiscale_intermittent_fullres: bool = False,
 ) -> torch.Tensor:
     """Sample using the given parameters with the unified KSampler.
     Returns:
         torch.Tensor: The sampled tensor.
     """
+    # Create extra options for multi-scale diffusion (for supported samplers)
+    multiscale_supported_samplers = [
+        "dpmpp_sde_cfgpp",
+        "sample_euler_ancestral",
+        "sample_euler",
+        "sample_dpmpp_2m_cfgpp",
+    ]
+    extra_options = {}
+    if sampler_name in multiscale_supported_samplers:
+        extra_options = {
+            "enable_multiscale": enable_multiscale,
+            "multiscale_factor": multiscale_factor,
+            "multiscale_fullres_start": multiscale_fullres_start,
+            "multiscale_fullres_end": multiscale_fullres_end,
+            "multiscale_intermittent_fullres": multiscale_intermittent_fullres,
+        }
+    # Use custom sampler with extra options for supported samplers
+    if sampler_name in multiscale_supported_samplers and extra_options:
+        # Create a custom sampler with multi-scale options
+        sampler_obj = ksampler(
+            sampler_name, pipeline=pipeline, extra_options=extra_options
+        )
+        sigmas = ksampler_util.calculate_sigmas(
+            model.get_model_object("model_sampling"), scheduler, steps
+        )
+        if denoise is None or denoise > 0.9999:
+            pass  # Use full sigmas
+        else:
+            if denoise <= 0.0:
+                sigmas = torch.FloatTensor([])
+            else:
+                new_steps = int(steps / denoise)
+                sigmas_full = ksampler_util.calculate_sigmas(
+                    model.get_model_object("model_sampling"), scheduler, new_steps
+                )
+                sigmas = sigmas_full[-(steps + 1) :]
+        # Process sigmas for start/end steps
+        if last_step is not None and last_step < (len(sigmas) - 1):
+            sigmas = sigmas[: last_step + 1]
+            if force_full_denoise:
+                sigmas[-1] = 0
+        if start_step is not None and start_step < (len(sigmas) - 1):
+            sigmas = sigmas[start_step:]
+        sigmas = sigmas.to(model.load_device)
+        # Use the custom sample function directly
+        samples = sample(
+            model,
+            noise,
+            positive,
+            negative,
+            cfg,
+            model.load_device,
+            sampler_obj,
+            sigmas,
+            model.model_options,
+            latent_image=latent_image,
+            denoise_mask=noise_mask,
+            callback=callback,
+            disable_pbar=disable_pbar,
+            seed=seed,
+            pipeline=pipeline,
+            flux=flux,
+        )
+    else:
+        # Use the standard KSampler for other samplers
+        sampler = KSampler(
+            model=model,
+            steps=steps,
+            sampler=sampler_name,
+            scheduler=scheduler,
+            denoise=denoise,
+            model_options=model.model_options,
+            pipeline=pipeline,
+        )
+        samples = sampler.direct_sample(
+            noise,
+            positive,
+            negative,
+            cfg=cfg,
+            latent_image=latent_image,
+            start_step=start_step,
+            last_step=last_step,
+            force_full_denoise=force_full_denoise,
+            denoise_mask=noise_mask,
+            sigmas=sigmas,
+            callback=callback,
+            disable_pbar=disable_pbar,
+            seed=seed,
+            flux=flux,
+        )
     samples = samples.to(Device.intermediate_device())
     return samples
     force_full_denoise: bool = False,
     pipeline: bool = False,
     flux: bool = False,
+    # Multi-scale diffusion parameters
+    enable_multiscale: bool = True,
+    multiscale_factor: float = 0.5,
+    multiscale_fullres_start: int = 3,
+    multiscale_fullres_end: int = 8,
+    multiscale_intermittent_fullres: bool = False,
 ) -> tuple:
     """Common ksampler function.
         seed=seed,
         pipeline=pipeline,
         flux=flux,
+        enable_multiscale=enable_multiscale,
+        multiscale_factor=multiscale_factor,
+        multiscale_fullres_start=multiscale_fullres_start,
+        multiscale_fullres_end=multiscale_fullres_end,
+        multiscale_intermittent_fullres=multiscale_intermittent_fullres,
     )
     out = latent.copy()
     out["samples"] = samples

modules/user/GUI.py CHANGED Viewed

@@ -32,6 +32,7 @@ from modules.Quantize import Quantizer
 from modules.WaveSpeed import fbcache_nodes
 from modules.hidiffusion import msw_msa_attention
 from modules.AutoHDR import ahdr
 Downloader.CheckAndDownload()
@@ -64,7 +65,10 @@ class App(tk.Tk):
         """Initialize the App class."""
         super().__init__()
         self.title("LightDiffusion")
-        self.geometry("900x750")
         # Configure main window grid
         self.grid_columnconfigure(1, weight=1)
@@ -238,6 +242,9 @@ class App(tk.Tk):
         self.checkbox_frame.grid_rowconfigure(0, weight=1)
         self.checkbox_frame.grid_rowconfigure(1, weight=1)
         self.checkbox_frame.grid_rowconfigure(2, weight=1)
         # checkbox for hiresfix
         self.hires_fix_var = tk.BooleanVar()
@@ -296,6 +303,43 @@ class App(tk.Tk):
             row=2, column=0, padx=(75, 5), pady=5, sticky="nsew"
         )
         # Button to launch the generation
         self.generate_button = ctk.CTkButton(
             self.sidebar,
@@ -432,6 +476,32 @@ class App(tk.Tk):
             else "dpmpp_2m_cfgpp"
         )
     def _img2img(self, file_path: str) -> None:
         """Perform img2img on the selected image.
@@ -516,7 +586,7 @@ class App(tk.Tk):
                 pass
             ultimatesdupscale_250 = ultimatesdupscale.upscale(
                 upscale_by=2,
-                seed=random.randint(1, 2**64),
                 steps=8,
                 cfg=6,
                 sampler_name=self.sampler,
@@ -576,6 +646,47 @@ class App(tk.Tk):
         else:
             print("Adetailer is OFF")
     def print_previewer(self) -> None:
         """Print the status of the previewer checkbox."""
         if self.previewer_var.get() is True:
@@ -754,8 +865,12 @@ class App(tk.Tk):
                 emptylatentimage_244 = emptylatentimage.generate(
                     width=w, height=h, batch_size=int(self.batch_slider.get())
                 )
                 ksampler_239 = ksampler_instance.sample(
-                    seed=random.randint(1, 2**64),
                     steps=20,
                     cfg=cfg,
                     sampler_name=self.sampler,
@@ -767,6 +882,7 @@ class App(tk.Tk):
                     positive=cliptextencode_242[0],
                     negative=cliptextencode_243[0],
                     latent_image=emptylatentimage_244[0],
                 )
                 self.progress.set(0.4)
                 if self.hires_fix_var.get() is True:
@@ -776,7 +892,7 @@ class App(tk.Tk):
                         samples=ksampler_239[0],
                     )
                     ksampler_253 = ksampler_instance.sample(
-                        seed=random.randint(1, 2**64),
                         steps=10,
                         cfg=8,
                         sampler_name="euler_ancestral_cfgpp",
@@ -788,6 +904,7 @@ class App(tk.Tk):
                         positive=cliptextencode_242[0],
                         negative=cliptextencode_243[0],
                         latent_image=latentupscale_254[0],
                     )
                     vaedecode_240 = vaedecode.decode(
                         samples=ksampler_253[0],
@@ -839,7 +956,7 @@ class App(tk.Tk):
                         guide_size=512,
                         guide_size_for=False,
                         max_size=768,
-                        seed=random.randint(1, 2**64),
                         steps=20,
                         cfg=6.5,
                         sampler_name=self.sampler,
@@ -893,7 +1010,7 @@ class App(tk.Tk):
                         guide_size=512,
                         guide_size_for=False,
                         max_size=768,
-                        seed=random.randint(1, 2**64),
                         steps=20,
                         cfg=6.5,
                         sampler_name=self.sampler,
@@ -994,7 +1111,7 @@ class App(tk.Tk):
                 # except ImportError:
                 #     print("Triton not found, skipping compilation")
                 ksampler_3 = ksampler.sample(
-                    seed=random.randint(1, 2**64),
                     steps=20,
                     cfg=1,
                     sampler_name="euler_cfgpp",

 from modules.WaveSpeed import fbcache_nodes
 from modules.hidiffusion import msw_msa_attention
 from modules.AutoHDR import ahdr
+from modules.sample.multiscale_presets import MULTISCALE_PRESETS, get_preset_parameters
 Downloader.CheckAndDownload()
         """Initialize the App class."""
         super().__init__()
         self.title("LightDiffusion")
+        self.geometry("900x850")
+        # Initialize last seed
+        self.last_seed = self._get_last_seed()
         # Configure main window grid
         self.grid_columnconfigure(1, weight=1)
         self.checkbox_frame.grid_rowconfigure(0, weight=1)
         self.checkbox_frame.grid_rowconfigure(1, weight=1)
         self.checkbox_frame.grid_rowconfigure(2, weight=1)
+        self.checkbox_frame.grid_rowconfigure(3, weight=1)
+        self.checkbox_frame.grid_rowconfigure(4, weight=1)
+        self.checkbox_frame.grid_rowconfigure(5, weight=1)
         # checkbox for hiresfix
         self.hires_fix_var = tk.BooleanVar()
             row=2, column=0, padx=(75, 5), pady=5, sticky="nsew"
         )
+        # checkbox to enable multi-scale diffusion
+        self.multiscale_var = tk.BooleanVar(value=True)
+        self.multiscale_checkbox = ctk.CTkCheckBox(
+            self.checkbox_frame,
+            text="Multi-Scale",
+            variable=self.multiscale_var,
+            text_color="black",
+        )
+        self.multiscale_checkbox.grid(row=2, column=1, padx=5, pady=5, sticky="nsew")
+        # checkbox to enable reuse last seed
+        self.reuse_seed_var = tk.BooleanVar()
+        self.reuse_seed_checkbox = ctk.CTkCheckBox(
+            self.checkbox_frame,
+            text="Reuse Last Seed",
+            variable=self.reuse_seed_var,
+            text_color="black",
+        )
+        self.reuse_seed_checkbox.grid(
+            row=3, column=0, padx=(75, 5), pady=5, sticky="nsew"
+        )
+        # Multiscale preset dropdown
+        preset_names = [preset.name for preset in MULTISCALE_PRESETS.values()]
+        self.multiscale_preset_var = tk.StringVar(value="Quality")
+        self.multiscale_preset_dropdown = ctk.CTkOptionMenu(
+            self.checkbox_frame,
+            values=preset_names,
+            variable=self.multiscale_preset_var,
+            fg_color="#F5EFFF",
+            text_color="black",
+            command=self.on_preset_selected,
+        )
+        self.multiscale_preset_dropdown.grid(
+            row=4, column=0, columnspan=2, padx=5, pady=2, sticky="ew"
+        )
         # Button to launch the generation
         self.generate_button = ctk.CTkButton(
             self.sidebar,
             else "dpmpp_2m_cfgpp"
         )
+    def _get_last_seed(self) -> int:
+        """Get the last used seed from file."""
+        try:
+            with open(os.path.join("./_internal/", "last_seed.txt"), "r") as f:
+                return int(f.read().strip())
+        except (FileNotFoundError, ValueError):
+            return random.randint(1, 2**64)
+    def _save_last_seed(self, seed: int) -> None:
+        """Save the seed to file."""
+        try:
+            with open(os.path.join("./_internal/", "last_seed.txt"), "w") as f:
+                f.write(str(seed))
+            self.last_seed = seed
+        except Exception as e:
+            print(f"Error saving seed: {e}")
+    def _get_seed(self) -> int:
+        """Get seed based on reuse_seed checkbox."""
+        if self.reuse_seed_var.get():
+            return self.last_seed
+        else:
+            new_seed = random.randint(1, 2**64)
+            self._save_last_seed(new_seed)
+            return new_seed
     def _img2img(self, file_path: str) -> None:
         """Perform img2img on the selected image.
                 pass
             ultimatesdupscale_250 = ultimatesdupscale.upscale(
                 upscale_by=2,
+                seed=self._get_seed(),
                 steps=8,
                 cfg=6,
                 sampler_name=self.sampler,
         else:
             print("Adetailer is OFF")
+    def on_preset_selected(self, preset_name: str) -> None:
+        """Handle multiscale preset selection."""
+        # Find preset by name (case-insensitive)
+        preset = None
+        preset_key = None
+        for key, p in MULTISCALE_PRESETS.items():
+            if p.name.lower() == preset_name.lower():
+                preset = p
+                preset_key = key
+                break
+        if preset:
+            print(f"Selected multiscale preset: {preset_name}")
+            print(f"  Factor: {preset.multiscale_factor}")
+            print(f"  Start steps: {preset.multiscale_fullres_start}")
+            print(f"  End steps: {preset.multiscale_fullres_end}")
+            print(f"  Intermittent full-res: {preset.multiscale_intermittent_fullres}")
+            # Enable multiscale if not disabled preset
+            if preset_name != "Disabled":
+                self.multiscale_var.set(True)
+            else:
+                self.multiscale_var.set(False)
+    def get_multiscale_params(self) -> dict:
+        """Get current multiscale parameters from selected preset."""
+        preset_name = self.multiscale_preset_var.get()
+        # Find preset by name (case-insensitive)
+        for key, preset in MULTISCALE_PRESETS.items():
+            if preset.name.lower() == preset_name.lower():
+                params = get_preset_parameters(key)
+                # Override enable_multiscale based on checkbox
+                params["enable_multiscale"] = self.multiscale_var.get()
+                return params
+        # Fallback to quality preset if not found
+        params = get_preset_parameters("quality")
+        params["enable_multiscale"] = self.multiscale_var.get()
+        return params
     def print_previewer(self) -> None:
         """Print the status of the previewer checkbox."""
         if self.previewer_var.get() is True:
                 emptylatentimage_244 = emptylatentimage.generate(
                     width=w, height=h, batch_size=int(self.batch_slider.get())
                 )
+                # Get multiscale parameters from selected preset
+                multiscale_params = self.get_multiscale_params()
                 ksampler_239 = ksampler_instance.sample(
+                    seed=self._get_seed(),
                     steps=20,
                     cfg=cfg,
                     sampler_name=self.sampler,
                     positive=cliptextencode_242[0],
                     negative=cliptextencode_243[0],
                     latent_image=emptylatentimage_244[0],
+                    **multiscale_params,
                 )
                 self.progress.set(0.4)
                 if self.hires_fix_var.get() is True:
                         samples=ksampler_239[0],
                     )
                     ksampler_253 = ksampler_instance.sample(
+                        seed=self._get_seed(),
                         steps=10,
                         cfg=8,
                         sampler_name="euler_ancestral_cfgpp",
                         positive=cliptextencode_242[0],
                         negative=cliptextencode_243[0],
                         latent_image=latentupscale_254[0],
+                        **multiscale_params,
                     )
                     vaedecode_240 = vaedecode.decode(
                         samples=ksampler_253[0],
                         guide_size=512,
                         guide_size_for=False,
                         max_size=768,
+                        seed=self._get_seed(),
                         steps=20,
                         cfg=6.5,
                         sampler_name=self.sampler,
                         guide_size=512,
                         guide_size_for=False,
                         max_size=768,
+                        seed=self._get_seed(),
                         steps=20,
                         cfg=6.5,
                         sampler_name=self.sampler,
                 # except ImportError:
                 #     print("Triton not found, skipping compilation")
                 ksampler_3 = ksampler.sample(
+                    seed=self._get_seed(),
                     steps=20,
                     cfg=1,
                     sampler_name="euler_cfgpp",

modules/user/pipeline.py CHANGED Viewed

@@ -44,6 +44,13 @@ def pipeline(
     prio_speed: bool = False,
     autohdr: bool = False,
     realistic_model: bool = False,
 ) -> None:
     """#### Run the LightDiffusion pipeline.
@@ -61,8 +68,29 @@ def pipeline(
         - `prio_speed` (bool, optional): Prioritize speed over quality. Defaults to False.
         - `autohdr` (bool, optional): Enable the AutoHDR mode. Defaults to False.
         - `realistic_model` (bool, optional): Use the realistic model. Defaults to False.
     """
     global last_seed
     if reuse_seed:
         seed = last_seed
@@ -288,6 +316,7 @@ def pipeline(
                     #     applystablefast_158, "diffusion_model", 0.120
                     # )
                 ksampler_239 = ksampler_instance.sample(
                     seed=seed,
                     steps=20,
@@ -302,6 +331,11 @@ def pipeline(
                     positive=cliptextencode_242[0],
                     negative=cliptextencode_243[0],
                     latent_image=emptylatentimage_244[0],
                 )
                 if hires_fix:
                     latentupscale_254 = latent_upscale.upscale(
@@ -537,6 +571,41 @@ if __name__ == "__main__":
         action="store_true",
         help="Use the realistic model.",
     )
     args = parser.parse_args()
     pipeline(
@@ -555,4 +624,10 @@ if __name__ == "__main__":
         args.prio_speed,
         args.autohdr,
         args.realistic_model,
     )

     prio_speed: bool = False,
     autohdr: bool = False,
     realistic_model: bool = False,
+    # Multi-scale diffusion parameters
+    multiscale_preset: str = None,
+    enable_multiscale: bool = True,
+    multiscale_factor: float = 0.5,
+    multiscale_fullres_start: int = 3,
+    multiscale_fullres_end: int = 8,
+    multiscale_intermittent_fullres: bool = False,
 ) -> None:
     """#### Run the LightDiffusion pipeline.
         - `prio_speed` (bool, optional): Prioritize speed over quality. Defaults to False.
         - `autohdr` (bool, optional): Enable the AutoHDR mode. Defaults to False.
         - `realistic_model` (bool, optional): Use the realistic model. Defaults to False.
+        - `multiscale_preset` (str, optional): Predefined multiscale preset ('quality', 'performance', 'balanced', 'disabled'). Overrides individual multiscale parameters. Defaults to None.
+        - `enable_multiscale` (bool, optional): Enable multi-scale diffusion for performance optimization. Defaults to True.
+        - `multiscale_factor` (float, optional): Scale factor for intermediate steps (0.1-1.0). Defaults to 0.5.
+        - `multiscale_fullres_start` (int, optional): Number of first steps at full resolution. Defaults to 3.
+        - `multiscale_fullres_end` (int, optional): Number of last steps at full resolution. Defaults to 8.
+        - `multiscale_intermittent_fullres` (bool, optional): Enable intermittent full-res rendering in low-res region. Defaults to False.
     """
     global last_seed
+    # Apply multiscale preset if specified (overrides individual parameters)
+    if multiscale_preset is not None:
+        from modules.sample.multiscale_presets import get_preset_parameters
+        preset_params = get_preset_parameters(multiscale_preset)
+        enable_multiscale = preset_params["enable_multiscale"]
+        multiscale_factor = preset_params["multiscale_factor"]
+        multiscale_fullres_start = preset_params["multiscale_fullres_start"]
+        multiscale_fullres_end = preset_params["multiscale_fullres_end"]
+        multiscale_intermittent_fullres = preset_params[
+            "multiscale_intermittent_fullres"
+        ]
+        print(f"Applied multiscale preset: {multiscale_preset}")
     if reuse_seed:
         seed = last_seed
                     #     applystablefast_158, "diffusion_model", 0.120
                     # )
+                # Create sampler with multi-scale options
                 ksampler_239 = ksampler_instance.sample(
                     seed=seed,
                     steps=20,
                     positive=cliptextencode_242[0],
                     negative=cliptextencode_243[0],
                     latent_image=emptylatentimage_244[0],
+                    enable_multiscale=enable_multiscale,
+                    multiscale_factor=multiscale_factor,
+                    multiscale_fullres_start=multiscale_fullres_start,
+                    multiscale_fullres_end=multiscale_fullres_end,
+                    multiscale_intermittent_fullres=multiscale_intermittent_fullres,
                 )
                 if hires_fix:
                     latentupscale_254 = latent_upscale.upscale(
         action="store_true",
         help="Use the realistic model.",
     )
+    parser.add_argument(
+        "--multiscale-preset",
+        type=str,
+        choices=["quality", "performance", "balanced", "disabled"],
+        help="Predefined multiscale preset ('quality', 'performance', 'balanced', 'disabled'). Overrides individual multiscale parameters.",
+    )
+    parser.add_argument(
+        "--enable-multiscale",
+        action="store_true",
+        default=True,
+        help="Enable multi-scale diffusion for performance optimization.",
+    )
+    parser.add_argument(
+        "--multiscale-factor",
+        type=float,
+        default=0.5,
+        help="Scale factor for intermediate steps (0.1-1.0).",
+    )
+    parser.add_argument(
+        "--multiscale-fullres-start",
+        type=int,
+        default=3,
+        help="Number of first steps at full resolution.",
+    )
+    parser.add_argument(
+        "--multiscale-fullres-end",
+        type=int,
+        default=8,
+        help="Number of last steps at full resolution.",
+    )
+    parser.add_argument(
+        "--multiscale-intermittent-fullres",
+        action="store_true",
+        help="Enable intermittent full-res rendering in low-res region.",
+    )
     args = parser.parse_args()
     pipeline(
         args.prio_speed,
         args.autohdr,
         args.realistic_model,
+        args.multiscale_preset,
+        args.enable_multiscale,
+        args.multiscale_factor,
+        args.multiscale_fullres_start,
+        args.multiscale_fullres_end,
+        args.multiscale_intermittent_fullres,
     )

pipeline.bat CHANGED Viewed

@@ -24,7 +24,7 @@ FOR /F "delims=" %%i IN ('nvidia-smi 2^>^&1') DO (
 )
 IF NOT ERRORLEVEL 1 (
     echo NVIDIA GPU detected, installing GPU dependencies...
-    uv pip install xformers torch torchvision --index-url https://download.pytorch.org/whl/cu126
 ) ELSE (
     echo No NVIDIA GPU detected, installing CPU dependencies...
     uv pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu

 )
 IF NOT ERRORLEVEL 1 (
     echo NVIDIA GPU detected, installing GPU dependencies...
+    uv pip install xformers torch torchvision --index-url https://download.pytorch.org/whl/cu128
 ) ELSE (
     echo No NVIDIA GPU detected, installing CPU dependencies...
     uv pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu

run.bat CHANGED Viewed

@@ -24,7 +24,7 @@ FOR /F "delims=" %%i IN ('nvidia-smi 2^>^&1') DO (
 )
 IF NOT ERRORLEVEL 1 (
     echo NVIDIA GPU detected, installing GPU dependencies...
-    uv pip install xformers torch torchvision --index-url https://download.pytorch.org/whl/cu126
 ) ELSE (
     echo No NVIDIA GPU detected, installing CPU dependencies...
     uv pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu

 )
 IF NOT ERRORLEVEL 1 (
     echo NVIDIA GPU detected, installing GPU dependencies...
+    uv pip install  torch torchvision --index-url https://download.pytorch.org/whl/cu128
 ) ELSE (
     echo No NVIDIA GPU detected, installing CPU dependencies...
     uv pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu

run_web.bat CHANGED Viewed

@@ -24,7 +24,7 @@ FOR /F "delims=" %%i IN ('nvidia-smi 2^>^&1') DO (
 )
 IF NOT ERRORLEVEL 1 (
     echo NVIDIA GPU detected, installing GPU dependencies...
-    uv pip install xformers torch torchvision --index-url https://download.pytorch.org/whl/cu126
 ) ELSE (
     echo No NVIDIA GPU detected, installing CPU dependencies...
     uv pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu

 )
 IF NOT ERRORLEVEL 1 (
     echo NVIDIA GPU detected, installing GPU dependencies...
+    uv pip install xformers torch torchvision --index-url https://download.pytorch.org/whl/cu128
 ) ELSE (
     echo No NVIDIA GPU detected, installing CPU dependencies...
     uv pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu