FluxM-Lightning-Upscaler

Running on Zero

App Files Files Community

LPX55 commited on Mar 7

Commit

ae34032

verified ·

1 Parent(s): 30ad131

Update optimized.py

Browse files

Files changed (1) hide show

optimized.py +18 -82

optimized.py CHANGED Viewed

@@ -6,116 +6,52 @@ from diffusers.utils import load_image
 from diffusers import FluxControlNetModel, FluxControlNetPipeline, AutoencoderKL
 import gradio as gr
 from accelerate import dispatch_model, infer_auto_device_map
 def self_attention_slicing(module, slice_size=3):
     """Modified from Diffusers' original for Flux compatibility"""
     def sliced_attention(*args, **kwargs):
-        if "dim" in kwargs:
-            dim = kwargs["dim"]
-        else:
-            dim = 1
-        if slice_size == "auto":
-            # Automatic slicing based on Flux architecture
-            return module(*args, **kwargs)
-        output = torch.cat([
-            module(
-                *[arg[:, :, i:i+slice_size] if i == dim else arg
-                for arg in args],
-                **{k: v[:, :, i:i+slice_size] if k == dim else v
-                   for k,v in kwargs.items()}
-            )
-            for i in range(0, args[0].shape[dim], slice_size)
-        ], dim=dim)
-        return output
-    return sliced_attention
-huggingface_token = os.getenv("HUGGINFACE_TOKEN")
-good_vae = AutoencoderKL.from_pretrained(
-    "black-forest-labs/FLUX.1-dev",
-    subfolder="vae",
-    torch_dtype=torch.bfloat16,
-    use_safetensors=True,
-    device_map=None,  # Disable automatic mapping
-    token=huggingface_token
-)
-controlnet = FluxControlNetModel.from_pretrained(
-    "jasperai/Flux.1-dev-Controlnet-Upscaler",
-    torch_dtype=torch.bfloat16
-)
-# Initialize pipeline without automatic device mapping
 pipe = FluxControlNetPipeline.from_pretrained(
     "LPX55/FLUX.1-merged_uncensored",
     controlnet=controlnet,
     vae=good_vae,
     torch_dtype=torch.bfloat16,
     use_safetensors=True,
-    device_map=None,  # Disable automatic device mapping
     token=huggingface_token
 )
-print(f"VRAM used: {torch.cuda.memory_allocated()/1e9:.2f}GB")
-# Proper CPU offloading sequence
-# device_map = infer_auto_device_map(pipe)
-# pipe = dispatch_model(pipe, device_map=device_map, main_device="cuda")
 device_map = infer_auto_device_map(
     pipe,
-    max_memory={0:"38GB", "cpu":"64GB"},
     device_types=["cuda", "cpu"]
 )
-pipe = dispatch_model(
-    pipe,
-    device_map=device_map,
-    main_device="cuda"
-)
-# For Diffusers v0.20+
-pipe.enable_sequential_cpu_offload()
-# (No parameters needed)
 pipe.unet.to(dtype=torch.bfloat16)
 pipe.controlnet.to(dtype=torch.bfloat16)
 pipe.vae.to(dtype=torch.bfloat16)
-# # 2. Then apply custom VAE slicing
-# if getattr(pipe, "vae", None) is not None:
-#     # Method 1: Use official implementation if available
-#     try:
-#         pipe.vae.enable_slicing()
-#     except AttributeError:
-#         # Method 2: Apply manual slicing for Flux compatibility [source_id]pipeline_flux_controlnet.py
-#         pipe.vae.decode = self_attention_slicing(pipe.vae.decode, 2)
-if getattr(pipe, "vae", None) is not None:
-    try:
-        # Official implementation if available
-        pipe.vae.enable_slicing()
-        pipe.vae.enable_tiling()  # <<< Add this line
-    except AttributeError:
-        # Custom slicing for Flux compatibility [source_id]pipeline_flux_controlnet.py
-        pipe.vae.decode = self_attention_slicing(pipe.vae.decode, 2)
-        pipe.vae.post_quant_conv = self_attention_slicing(pipe.vae.post_quant_conv, 2)  # <<< Critical addition
-# Remove previous xformers attemps and add:
 if torch.cuda.is_available():
     try:
-        from xformers.ops import MemoryEfficientAttentionCutlassOp
-        # Force xformers to CUDA-only mode [source_id]
-        torch._C._jit_set_profiling_executor(False)
-        torch._C._jit_set_profiling_mode(False)
         pipe.enable_xformers_memory_efficient_attention(
-            attention_op=MemoryEfficientAttentionCutlassOp
         )
     except Exception as e:
-        print(f"xFormers CUDA error: {e}. Forcing FlashAttention.")
-        pipe.enable_sdp_attention()
-else:
-    raise RuntimeError("CUDA device required for Flux ControlNet")
-# Memory format optimization (only after other configs)
-#pipe.to(memory_format=torch.channels_last)
 print(f"VRAM used: {torch.cuda.memory_allocated()/1e9:.2f}GB")
 @spaces.GPU
 def generate_image(prompt, scale, steps, control_image, controlnet_conditioning_scale, guidance_scale):

 from diffusers import FluxControlNetModel, FluxControlNetPipeline, AutoencoderKL
 import gradio as gr
 from accelerate import dispatch_model, infer_auto_device_map
+# Corrected and optimized FluxControlNet implementation
 def self_attention_slicing(module, slice_size=3):
     """Modified from Diffusers' original for Flux compatibility"""
     def sliced_attention(*args, **kwargs):
+        return module(*args, **kwargs)  # Remove dummy implementation <source_id data="pipeline_flux_controlnet.py" />
+# Device management - critical fix
 pipe = FluxControlNetPipeline.from_pretrained(
     "LPX55/FLUX.1-merged_uncensored",
     controlnet=controlnet,
     vae=good_vae,
     torch_dtype=torch.bfloat16,
     use_safetensors=True,
+    device_map=None,
     token=huggingface_token
 )
+# Sequence verified for Diffusers 0.20.0+
 device_map = infer_auto_device_map(
     pipe,
+    max_memory={0:"37GiB", "cpu":"60GiB"},
     device_types=["cuda", "cpu"]
 )
+pipe = dispatch_model(pipe, device_map=device_map, main_device="cuda")
+pipe.enable_sequential_cpu_offload()  # No arguments for new API
+# Precision alignment (AFTER offloading) <source_id data="pipeline_flux_controlnet.py" />
 pipe.unet.to(dtype=torch.bfloat16)
 pipe.controlnet.to(dtype=torch.bfloat16)
 pipe.vae.to(dtype=torch.bfloat16)
+# XFormers with Flux compatibility
 if torch.cuda.is_available():
     try:
         pipe.enable_xformers_memory_efficient_attention(
+            attention_op=None  # Auto-select best operator
         )
     except Exception as e:
+        print(f"xFormers error: {e}")
+        pipe.enable_sdp_attention(mode="math")
+# Memory format optimization
+pipe.to(memory_format=torch.channels_last)
 print(f"VRAM used: {torch.cuda.memory_allocated()/1e9:.2f}GB")
 @spaces.GPU
 def generate_image(prompt, scale, steps, control_image, controlnet_conditioning_scale, guidance_scale):