Spaces:

AbstractPhil
/

shunt-adapter-testing

Runtime error

App Files Files Community

AbstractPhil commited on 8 days ago

Commit

d3479d5

1 Parent(s): c557c56

local project created to properly edit and debug

Browse files

Files changed (9) hide show

__init__.py +0 -0
app.py +77 -122
conditioning_shifter.py +1 -1
configs.py +13 -0
custom/__init__.py +0 -0
custom/t5_encoder_with_projection.py +62 -0
model_manager.py +615 -0
requirements.txt +2 -2
two_stream_shunt_adapter.py +0 -1

__init__.py ADDED Viewed

File without changes

app.py CHANGED Viewed

@@ -1,56 +1,49 @@
 # app.py ────────────────────────────────────────────────────────────────
 import io, warnings, numpy as np, matplotlib.pyplot as plt
 from pathlib import Path
-from typing import Dict, List, Optional, Tuple
 import gradio as gr
-import torch, torch.nn.functional as F
-from PIL import Image
 from transformers import T5Tokenizer, T5EncoderModel
-from diffusers import (
-    StableDiffusionXLPipeline,
-    DDIMScheduler, EulerDiscreteScheduler, DPMSolverMultistepScheduler,
-)
 from huggingface_hub import hf_hub_download
 from safetensors.torch import load_file
-# local modules
 from two_stream_shunt_adapter import TwoStreamShuntAdapter
 from conditioning_shifter import ConditioningShifter, ShiftConfig, AdapterOutput
-from configs import T5_SHUNT_REPOS
 warnings.filterwarnings("ignore")
-# ─── GLOBALS ────────────────────────────────────────────────────────────
-dtype  = torch.float16
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-_t5_tok: Optional[T5Tokenizer]     = None
-_t5_mod: Optional[T5EncoderModel]  = None
-_pipe  : Optional[StableDiffusionXLPipeline] = None
 SCHEDULERS = {
     "DPM++ 2M": DPMSolverMultistepScheduler,
-    "DDIM":     DDIMScheduler,
-    "Euler":    EulerDiscreteScheduler,
 }
-# adapter-meta from configs.py
-clip_l_opts = T5_SHUNT_REPOS["clip_l"]["shunts_available"]["shunt_list"]
-clip_g_opts = T5_SHUNT_REPOS["clip_g"]["shunts_available"]["shunt_list"]
-repo_l, conf_l = T5_SHUNT_REPOS["clip_l"]["repo"], T5_SHUNT_REPOS["clip_l"]["config"]
-repo_g, conf_g = T5_SHUNT_REPOS["clip_g"]["repo"], T5_SHUNT_REPOS["clip_g"]["config"]
-# ─── INITIALISERS ────────────────────────────────────────────────────────
 def _init_t5():
     global _t5_tok, _t5_mod
     if _t5_tok is None:
         _t5_tok = T5Tokenizer.from_pretrained("google/flan-t5-base")
-        _t5_mod = T5EncoderModel.from_pretrained("google/flan-t5-base") \
-                                .to(device).eval()
 def _init_pipe():
     global _pipe
@@ -61,16 +54,15 @@ def _init_pipe():
         ).to(device)
         _pipe.enable_xformers_memory_efficient_attention()
-# ─── HELPERS ─────────────────────────────────────────────────────────────
-def load_adapter(repo: str, filename: str, cfg: dict,
-                 device: torch.device) -> TwoStreamShuntAdapter:
-    path   = hf_hub_download(repo_id=repo, filename=filename)
-    model  = TwoStreamShuntAdapter(cfg).eval()
     model.load_state_dict(load_file(path))
     return model.to(device)
 def plot_heat(mat: torch.Tensor | np.ndarray, title: str) -> np.ndarray:
     if isinstance(mat, torch.Tensor):
         mat = mat.detach().cpu().numpy()
@@ -90,34 +82,25 @@ def plot_heat(mat: torch.Tensor | np.ndarray, title: str) -> np.ndarray:
     plt.close(); buf.seek(0)
     return np.array(Image.open(buf))
 def encode_prompt_xl(pipe, prompt: str, negative: str) -> Dict[str, torch.Tensor]:
-    tok_l  = pipe.tokenizer  (prompt,  max_length=77, truncation=True,
-                              padding="max_length", return_tensors="pt").input_ids.to(device)
-    tok_g  = pipe.tokenizer_2(prompt,  max_length=77, truncation=True,
-                              padding="max_length", return_tensors="pt").input_ids.to(device)
-    ntok_l = pipe.tokenizer  (negative,max_length=77, truncation=True,
-                              padding="max_length", return_tensors="pt").input_ids.to(device)
-    ntok_g = pipe.tokenizer_2(negative,max_length=77, truncation=True,
-                              padding="max_length", return_tensors="pt").input_ids.to(device)
     with torch.no_grad():
-        clip_l      = pipe.text_encoder(tok_l)[0]
-        neg_clip_l  = pipe.text_encoder(ntok_l)[0]
-        g_out       = pipe.text_encoder_2(tok_g,  output_hidden_states=False)
-        clip_g, pl  = g_out[1], g_out[0]
-        ng_out      = pipe.text_encoder_2(ntok_g, output_hidden_states=False)
         neg_clip_g, npl = ng_out[1], ng_out[0]
-    return {"clip_l": clip_l, "clip_g": clip_g,
-            "neg_l":  neg_clip_l, "neg_g":  neg_clip_g,
-            "pooled": pl, "neg_pooled": npl}
 # ─── INFERENCE ───────────────────────────────────────────────────────────
 def infer(prompt: str, negative_prompt: str,
-          adapter_l_file: str, adapter_g_file: str,
           strength: float, delta_scale: float, sigma_scale: float,
           gpred_scale: float, noise: float, gate_prob: float, use_anchor: bool,
           steps: int, cfg_scale: float, scheduler_name: str,
@@ -129,91 +112,76 @@ def infer(prompt: str, negative_prompt: str,
     if scheduler_name in SCHEDULERS:
         _pipe.scheduler = SCHEDULERS[scheduler_name].from_config(_pipe.scheduler.config)
-    generator = (torch.Generator(device=device).manual_seed(seed)
-                 if seed != -1 else None)
-    # build ShiftConfig (one per request)
     cfg_shift = ShiftConfig(
-        prompt           = prompt,
-        seed             = seed,
-        strength         = strength,
-        delta_scale      = delta_scale,
-        sigma_scale      = sigma_scale,
-        gate_probability = gate_prob,
-        noise_injection  = noise,
-        use_anchor       = use_anchor,
-        guidance_scale   = gpred_scale,
     )
-    # encoder (T5) embeddings
     t5_seq = ConditioningShifter.extract_encoder_embeddings(
         {"tokenizer": _t5_tok, "model": _t5_mod, "config": {"config": {}}},
         device, cfg_shift
     )
-    # CLIP embeddings
     embeds = encode_prompt_xl(_pipe, prompt, negative_prompt)
-    # run adapters --------------------------------------------------------
     outputs: List[AdapterOutput] = []
-    if adapter_l_file and adapter_l_file != "None":
-        ada_l = load_adapter(repo_l, adapter_l_file, conf_l, device)
         outputs.append(ConditioningShifter.run_adapter(
             ada_l, t5_seq, embeds["clip_l"],
             cfg_shift.guidance_scale, "clip_l", (0, 768)))
-    if adapter_g_file and adapter_g_file != "None":
-        ada_g = load_adapter(repo_g, adapter_g_file, conf_g, device)
         outputs.append(ConditioningShifter.run_adapter(
             ada_g, t5_seq, embeds["clip_g"],
             cfg_shift.guidance_scale, "clip_g", (768, 2048)))
-    # apply modifications -------------------------------------------------
     clip_l_mod, clip_g_mod = embeds["clip_l"], embeds["clip_g"]
-    delta_viz = {"clip_l": torch.zeros_like(clip_l_mod),
-                 "clip_g": torch.zeros_like(clip_g_mod)}
-    gate_viz  = {"clip_l": torch.zeros_like(clip_l_mod[..., :1]),
-                 "clip_g": torch.zeros_like(clip_g_mod[..., :1])}
     for out in outputs:
         target = clip_l_mod if out.adapter_type == "clip_l" else clip_g_mod
-        mod    = ConditioningShifter.apply_modifications(target, [out], cfg_shift)
-        if out.adapter_type == "clip_l":
-            clip_l_mod = mod
-        else:
-            clip_g_mod = mod
         delta_viz[out.adapter_type] = out.delta.detach()
-        gate_viz [out.adapter_type] = out.gate.detach()
-    # prepare for SDXL ----------------------------------------------------
     prompt_embeds = torch.cat([clip_l_mod, clip_g_mod], dim=-1)
-    neg_embeds    = torch.cat([embeds["neg_l"], embeds["neg_g"]], dim=-1)
     image = _pipe(
-        prompt_embeds              = prompt_embeds,
-        negative_prompt_embeds     = neg_embeds,
-        pooled_prompt_embeds       = embeds["pooled"],
-        negative_pooled_prompt_embeds = embeds["neg_pooled"],
-        num_inference_steps = steps,
-        guidance_scale      = cfg_scale,
-        width = width, height = height, generator = generator
     ).images[0]
-    # diagnostics ---------------------------------------------------------
     delta_l_img = plot_heat(delta_viz["clip_l"].squeeze(), "Δ CLIP-L")
-    gate_l_img  = plot_heat(gate_viz ["clip_l"].squeeze().mean(-1, keepdims=True), "Gate L")
     delta_g_img = plot_heat(delta_viz["clip_g"].squeeze(), "Δ CLIP-G")
-    gate_g_img  = plot_heat(gate_viz ["clip_g"].squeeze().mean(-1, keepdims=True), "Gate G")
-    stats_l = (f"τ̄_L = {outputs[0].tau.mean().item():.3f}"
-               if outputs and outputs[0].adapter_type == "clip_l" else "-")
-    stats_g = (f"τ̄_G = {outputs[-1].tau.mean().item():.3f}"
-               if len(outputs) > 1 and outputs[-1].adapter_type == "clip_g" else "-")
     return image, delta_l_img, gate_l_img, delta_g_img, gate_g_img, stats_l, stats_g
-# ─── GRADIO UI ────────────────────────────────────────────────────────────
 def create_interface():
     with gr.Blocks(title="SDXL Dual-Shunt Tester", theme=gr.themes.Soft()) as demo:
         gr.Markdown("# 🧠 SDXL Dual-Shunt Tester")
@@ -221,18 +189,12 @@ def create_interface():
         with gr.Row():
             with gr.Column(scale=1):
                 gr.Markdown("### Prompts")
-                prompt   = gr.Textbox(label="Prompt", lines=3,
-                                      value="a futuristic control station with holographic displays")
-                negative = gr.Textbox(label="Negative", lines=2,
-                                      value="blurry, low quality, distorted")
                 gr.Markdown("### Adapters")
-                adapter_l = gr.Dropdown(["None"] + clip_l_opts,
-                                        value="t5-vit-l-14-dual_shunt_caption.safetensors",
-                                        label="CLIP-L Adapter")
-                adapter_g = gr.Dropdown(["None"] + clip_g_opts,
-                                        value="dual_shunt_omega_no_caption_noised_e1_step_10000.safetensors",
-                                        label="CLIP-G Adapter")
                 gr.Markdown("### Adapter Controls")
                 strength    = gr.Slider(0, 10, 4.0, 0.05, label="Strength")
@@ -247,8 +209,7 @@ def create_interface():
                 with gr.Row():
                     steps     = gr.Slider(1, 50, 20, 1, label="Steps")
                     cfg_scale = gr.Slider(1, 15, 7.5, 0.1, label="CFG")
-                scheduler = gr.Dropdown(list(SCHEDULERS.keys()),
-                                        value="DPM++ 2M", label="Scheduler")
                 with gr.Row():
                     width  = gr.Slider(512, 1536, 1024, 64, label="Width")
                     height = gr.Slider(512, 1536, 1024, 64, label="Height")
@@ -266,13 +227,8 @@ def create_interface():
                 stats_l  = gr.Textbox(label="Stats L", interactive=False)
                 stats_g  = gr.Textbox(label="Stats G", interactive=False)
-        def _run(*args):
-            pl, npl = args[0], args[1]
-            al, ag  = (None if v == "None" else v for v in args[2:4])
-            return infer(pl, npl, al, ag, *args[4:])
         run_btn.click(
-            fn=_run,
             inputs=[prompt, negative, adapter_l, adapter_g, strength, delta_scale,
                     sigma_scale, gpred_scale, noise, gate_prob, use_anchor, steps,
                     cfg_scale, scheduler, width, height, seed],
@@ -280,6 +236,5 @@ def create_interface():
         )
     return demo
 if __name__ == "__main__":
     create_interface().launch()

 # app.py ────────────────────────────────────────────────────────────────
 import io, warnings, numpy as np, matplotlib.pyplot as plt
+from typing import Dict, List, Optional
+from PIL import Image
 from pathlib import Path
 import gradio as gr
+import torch
+import torch.nn.functional as F
 from transformers import T5Tokenizer, T5EncoderModel
+from diffusers import StableDiffusionXLPipeline, DDIMScheduler, EulerDiscreteScheduler, DPMSolverMultistepScheduler
 from huggingface_hub import hf_hub_download
 from safetensors.torch import load_file
 from two_stream_shunt_adapter import TwoStreamShuntAdapter
 from conditioning_shifter import ConditioningShifter, ShiftConfig, AdapterOutput
+from configs import ShuntUtil
 warnings.filterwarnings("ignore")
+# ─── GLOBALS ─────────────────────────────────────────────────────────────
+dtype = torch.float16
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+_t5_tok: Optional[T5Tokenizer] = None
+_t5_mod: Optional[T5EncoderModel] = None
+_pipe: Optional[StableDiffusionXLPipeline] = None
 SCHEDULERS = {
     "DPM++ 2M": DPMSolverMultistepScheduler,
+    "DDIM": DDIMScheduler,
+    "Euler": EulerDiscreteScheduler,
 }
+clip_l_shunts = ShuntUtil.get_shunts_by_clip_type("clip_l")
+clip_g_shunts = ShuntUtil.get_shunts_by_clip_type("clip_g")
+clip_l_opts = ["None"] + [s.name for s in clip_l_shunts]
+clip_g_opts = ["None"] + [s.name for s in clip_g_shunts]
+# ─── INIT ───────────────────────────────────────────────────────────────
 def _init_t5():
     global _t5_tok, _t5_mod
     if _t5_tok is None:
         _t5_tok = T5Tokenizer.from_pretrained("google/flan-t5-base")
+        _t5_mod = T5EncoderModel.from_pretrained("google/flan-t5-base").to(device).eval()
 def _init_pipe():
     global _pipe
         ).to(device)
         _pipe.enable_xformers_memory_efficient_attention()
+# ─── UTILITY ────────────────────────────────────────────────────────────
+def load_adapter_by_name(name: str, device: torch.device) -> TwoStreamShuntAdapter:
+    shunt = ShuntUtil.get_shunt_by_name(name)
+    assert shunt, f"Shunt '{name}' not found."
+    path = hf_hub_download(repo_id=shunt.repo, filename=shunt.file)
+    model = TwoStreamShuntAdapter(shunt.config).eval()
     model.load_state_dict(load_file(path))
     return model.to(device)
 def plot_heat(mat: torch.Tensor | np.ndarray, title: str) -> np.ndarray:
     if isinstance(mat, torch.Tensor):
         mat = mat.detach().cpu().numpy()
     plt.close(); buf.seek(0)
     return np.array(Image.open(buf))
 def encode_prompt_xl(pipe, prompt: str, negative: str) -> Dict[str, torch.Tensor]:
+    tok_l = pipe.tokenizer(prompt, max_length=77, truncation=True, padding="max_length", return_tensors="pt").input_ids.to(device)
+    tok_g = pipe.tokenizer_2(prompt, max_length=77, truncation=True, padding="max_length", return_tensors="pt").input_ids.to(device)
+    ntok_l = pipe.tokenizer(negative, max_length=77, truncation=True, padding="max_length", return_tensors="pt").input_ids.to(device)
+    ntok_g = pipe.tokenizer_2(negative, max_length=77, truncation=True, padding="max_length", return_tensors="pt").input_ids.to(device)
     with torch.no_grad():
+        clip_l = pipe.text_encoder(tok_l)[0]
+        neg_clip_l = pipe.text_encoder(ntok_l)[0]
+        g_out = pipe.text_encoder_2(tok_g, output_hidden_states=False)
+        clip_g, pl = g_out[1], g_out[0]
+        ng_out = pipe.text_encoder_2(ntok_g, output_hidden_states=False)
         neg_clip_g, npl = ng_out[1], ng_out[0]
+    return {"clip_l": clip_l, "clip_g": clip_g, "neg_l": neg_clip_l, "neg_g": neg_clip_g, "pooled": pl, "neg_pooled": npl}
 # ─── INFERENCE ───────────────────────────────────────────────────────────
 def infer(prompt: str, negative_prompt: str,
+          adapter_l_name: str, adapter_g_name: str,
           strength: float, delta_scale: float, sigma_scale: float,
           gpred_scale: float, noise: float, gate_prob: float, use_anchor: bool,
           steps: int, cfg_scale: float, scheduler_name: str,
     if scheduler_name in SCHEDULERS:
         _pipe.scheduler = SCHEDULERS[scheduler_name].from_config(_pipe.scheduler.config)
+    generator = (torch.Generator(device=device).manual_seed(seed) if seed != -1 else None)
     cfg_shift = ShiftConfig(
+        prompt=prompt,
+        seed=seed,
+        strength=strength,
+        delta_scale=delta_scale,
+        sigma_scale=sigma_scale,
+        gate_probability=gate_prob,
+        noise_injection=noise,
+        use_anchor=use_anchor,
+        guidance_scale=gpred_scale,
     )
     t5_seq = ConditioningShifter.extract_encoder_embeddings(
         {"tokenizer": _t5_tok, "model": _t5_mod, "config": {"config": {}}},
         device, cfg_shift
     )
     embeds = encode_prompt_xl(_pipe, prompt, negative_prompt)
     outputs: List[AdapterOutput] = []
+    if adapter_l_name and adapter_l_name != "None":
+        ada_l = load_adapter_by_name(adapter_l_name, device)
         outputs.append(ConditioningShifter.run_adapter(
             ada_l, t5_seq, embeds["clip_l"],
             cfg_shift.guidance_scale, "clip_l", (0, 768)))
+    if adapter_g_name and adapter_g_name != "None":
+        ada_g = load_adapter_by_name(adapter_g_name, device)
         outputs.append(ConditioningShifter.run_adapter(
             ada_g, t5_seq, embeds["clip_g"],
             cfg_shift.guidance_scale, "clip_g", (768, 2048)))
     clip_l_mod, clip_g_mod = embeds["clip_l"], embeds["clip_g"]
+    delta_viz = {"clip_l": torch.zeros_like(clip_l_mod), "clip_g": torch.zeros_like(clip_g_mod)}
+    gate_viz = {"clip_l": torch.zeros_like(clip_l_mod[..., :1]), "clip_g": torch.zeros_like(clip_g_mod[..., :1])}
     for out in outputs:
         target = clip_l_mod if out.adapter_type == "clip_l" else clip_g_mod
+        mod = ConditioningShifter.apply_modifications(target, [out], cfg_shift)
+        if out.adapter_type == "clip_l": clip_l_mod = mod
+        else: clip_g_mod = mod
         delta_viz[out.adapter_type] = out.delta.detach()
+        gate_viz[out.adapter_type] = out.gate.detach()
     prompt_embeds = torch.cat([clip_l_mod, clip_g_mod], dim=-1)
+    neg_embeds = torch.cat([embeds["neg_l"], embeds["neg_g"]], dim=-1)
     image = _pipe(
+        prompt_embeds=prompt_embeds,
+        negative_prompt_embeds=neg_embeds,
+        pooled_prompt_embeds=embeds["pooled"],
+        negative_pooled_prompt_embeds=embeds["neg_pooled"],
+        num_inference_steps=steps,
+        guidance_scale=cfg_scale,
+        width=width, height=height, generator=generator
     ).images[0]
     delta_l_img = plot_heat(delta_viz["clip_l"].squeeze(), "Δ CLIP-L")
+    gate_l_img  = plot_heat(gate_viz["clip_l"].squeeze().mean(-1, keepdims=True), "Gate L")
     delta_g_img = plot_heat(delta_viz["clip_g"].squeeze(), "Δ CLIP-G")
+    gate_g_img  = plot_heat(gate_viz["clip_g"].squeeze().mean(-1, keepdims=True), "Gate G")
+    stats_l = (f"τ̄_L = {outputs[0].tau.mean().item():.3f}" if outputs and outputs[0].adapter_type == "clip_l" else "-")
+    stats_g = (f"τ̄_G = {outputs[-1].tau.mean().item():.3f}" if len(outputs) > 1 and outputs[-1].adapter_type == "clip_g" else "-")
     return image, delta_l_img, gate_l_img, delta_g_img, gate_g_img, stats_l, stats_g
+# ─── GRADIO UI ───────────────────────────────────────────────────────────
 def create_interface():
     with gr.Blocks(title="SDXL Dual-Shunt Tester", theme=gr.themes.Soft()) as demo:
         gr.Markdown("# 🧠 SDXL Dual-Shunt Tester")
         with gr.Row():
             with gr.Column(scale=1):
                 gr.Markdown("### Prompts")
+                prompt   = gr.Textbox(label="Prompt", lines=3)
+                negative = gr.Textbox(label="Negative", lines=2)
                 gr.Markdown("### Adapters")
+                adapter_l = gr.Dropdown(clip_l_opts, value=clip_l_opts[1], label="CLIP-L Adapter")
+                adapter_g = gr.Dropdown(clip_g_opts, value=clip_g_opts[1], label="CLIP-G Adapter")
                 gr.Markdown("### Adapter Controls")
                 strength    = gr.Slider(0, 10, 4.0, 0.05, label="Strength")
                 with gr.Row():
                     steps     = gr.Slider(1, 50, 20, 1, label="Steps")
                     cfg_scale = gr.Slider(1, 15, 7.5, 0.1, label="CFG")
+                scheduler = gr.Dropdown(list(SCHEDULERS.keys()), value="DPM++ 2M", label="Scheduler")
                 with gr.Row():
                     width  = gr.Slider(512, 1536, 1024, 64, label="Width")
                     height = gr.Slider(512, 1536, 1024, 64, label="Height")
                 stats_l  = gr.Textbox(label="Stats L", interactive=False)
                 stats_g  = gr.Textbox(label="Stats G", interactive=False)
         run_btn.click(
+            fn=infer,
             inputs=[prompt, negative, adapter_l, adapter_g, strength, delta_scale,
                     sigma_scale, gpred_scale, noise, gate_prob, use_anchor, steps,
                     cfg_scale, scheduler, width, height, seed],
         )
     return demo
 if __name__ == "__main__":
     create_interface().launch()

conditioning_shifter.py CHANGED Viewed

@@ -4,7 +4,7 @@ import logging
 from typing import Dict, List, Tuple, Optional, Any
 from dataclasses import dataclass
-from . import ConditionModulationShuntAdapter, reshape_for_shunt
 logger = logging.getLogger(__name__)

 from typing import Dict, List, Tuple, Optional, Any
 from dataclasses import dataclass
+from two_stream_shunt_adapter import ConditionModulationShuntAdapter, reshape_for_shunt
 logger = logging.getLogger(__name__)

configs.py CHANGED Viewed

@@ -801,3 +801,16 @@ class ShuntUtil:
         """
         return [shunt.name for shunt in SHUNT_DATAS]

         """
         return [shunt.name for shunt in SHUNT_DATAS]
+    @staticmethod
+    def get_shunts_by_clip_type(clip_type: str) -> list[ShuntData]:
+        """
+        Returns a list of shunts that match the given clip type.
+        Args:
+            clip_type (str): The type of clip to filter by (e.g., "clip_l", "clip_g").
+        Returns:
+            list[ShuntData]: List of shunts that match the clip type.
+        """
+        return [shunt for shunt in SHUNT_DATAS if any(mod["type"] == clip_type for mod in shunt.modulation_encoders)]

custom/__init__.py ADDED Viewed

File without changes

custom/t5_encoder_with_projection.py ADDED Viewed

	@@ -0,0 +1,62 @@

+import torch
+from transformers import T5EncoderModel, T5Config, T5PreTrainedModel
+from transformers.modeling_outputs import BaseModelOutput
+from typing import List, Optional, Tuple, Union
+from torch import nn, Tensor
+class T5ProjectionConfig(T5Config):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self.project_in_dim = kwargs.get("project_in_dim", 768)
+        self.project_out_dim = kwargs.get("out_dim", 4096)
+class T5EncoderWithProjection(T5PreTrainedModel):
+    config_class = T5ProjectionConfig
+    def __init__(self, config):
+        super().__init__(config)
+        # self.encoder = encoder
+        self.encoder = T5EncoderModel(config)
+        self.final_projection = nn.Sequential(
+            nn.Linear(config.project_in_dim, config.project_out_dim, bias=False),
+            nn.ReLU(),
+            nn.Dropout(0.0),
+            nn.Linear(config.project_out_dim, config.project_out_dim, bias=False)
+        )
+    def forward(
+            self,
+            input_ids: Optional[torch.LongTensor] = None,
+            attention_mask: Optional[torch.FloatTensor] = None,
+            head_mask: Optional[torch.FloatTensor] = None,
+            inputs_embeds: Optional[torch.FloatTensor] = None,
+            output_attentions: Optional[bool] = None,
+            output_hidden_states: Optional[bool] = None,
+            return_dict: Optional[bool] = None,
+    ) -> Union[Tuple[torch.FloatTensor], BaseModelOutput]:
+        return_dict = return_dict if return_dict is not None else False
+        encoder_outputs = self.encoder(
+            input_ids=input_ids,
+            attention_mask=attention_mask,
+            inputs_embeds=inputs_embeds,
+            head_mask=head_mask,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=return_dict,
+        )
+        last_hidden_state = self.final_projection(encoder_outputs[0])
+        # last_hidden_state = self.final_block(last_hidden_state)[0]
+        if not return_dict:
+            return tuple(
+                v for v in [last_hidden_state] if v is not None
+            )
+        return BaseModelOutput(
+            last_hidden_state=last_hidden_state
+        )

model_manager.py ADDED Viewed

	@@ -0,0 +1,615 @@

+from typing import Dict, Optional, Any, Union, Tuple
+import os
+import torch
+import torch.nn as nn
+import logging
+from pathlib import Path
+from dataclasses import dataclass
+from enum import Enum
+from safetensors.torch import load_file
+from torch.nn import Module
+from transformers import AutoModel, AutoTokenizer, AutoConfig, AutoModelForSeq2SeqLM, BertModel, BertTokenizer, \
+    PreTrainedTokenizerFast, T5TokenizerFast, T5EncoderModel
+from .custom.t5_encoder_with_projection import T5EncoderWithProjection
+logger = logging.getLogger(__name__)
+# --------------------------------------------------------------------------- #
+# Helper for namespaced cache keys
+def _make_key(model_type: str, model_id: str) -> str:
+    """
+    Produce a unique key for the internal cache.
+    Example
+    -------
+    >>> _make_key("bert", "bert-base")
+    'bert:bert-base'
+    """
+    return f"{model_type}:{model_id}"
+# Thread-safe registry wrapper
+class _SafeDict(dict):
+    """A dict protected by a re-entrant lock for thread-safe writes."""
+    def __init__(self):
+        super().__init__()
+        import threading
+        self._lock = threading.RLock()
+    def safe_set(self, key, value):
+        with self._lock:
+            super().__setitem__(key, value)
+    def safe_get(self, key, default=None):
+        with self._lock:
+            return super().get(key, default)
+    def safe_del(self, key):
+        with self._lock:
+            if key in self:
+                super().__delitem__(key)
+                return True
+            return False
+# -------------------------------------------------------------------------------------------------------------------- #
+# WARNING: ENABLING THIS TRUST_REMOTE_CODE FLAG WILL ALLOW EXECUTION OF ARBITRARY CODE FROM THE MODEL REPOSITORY.
+# USE WITH EXTREME CAUTION, AS IT CAN POTENTIALLY EXECUTE MALICIOUS CODE FROM UNTRUSTED SOURCES.
+TRUST_REMOTE_CODE = False  # Set to True only if you trust the source of the models you are loading.
+# I advise leaving this OFF for any production or sensitive environments, and for any government or enterprise use.
+# Ensure you fully trust the model repository and its maintainers and reviewing the code thoroughly.
+# You cannot ONLY trust an AI's response to the question of whether it is safe to enable this flag,
+#   as it may not have the full context of security implications or the specific model's behavior.
+# -------------------------------------------------------------------------------------------------------------------- #
+# COMFYUI operates within a form of sandbox, but enabling remote code execution can still pose many unseen risks.
+# -------------------------------------------------------------------------------------------------------------------- #
+class ModelType(Enum):
+    """Enum for different model types"""
+    SHUNT_ADAPTER = "shunt_adapter"
+    T5_MODEL = "t5_model"
+    BERT_MODEL = "bert"
+    NOMIC_BERT_MODEL = "nomic_bert"
+    GENERIC = "generic"
+    TOKENIZER = "tokenizer"
+@dataclass
+class ModelInfo:
+    """Container for model information"""
+    model: nn.Module
+    model_type: ModelType
+    config: Dict[str, Any]
+    device: torch.device
+    dtype: torch.dtype
+    metadata: Dict[str, Any] = None
+    trust_remote_code: bool = TRUST_REMOTE_CODE  # Use global setting by default
+class ModelManager:
+    """
+    Centralized model loader / cache with thread-safety and namespaced keys.
+    """
+    def __init__(self, cache_dir: Optional[str] = None):
+        # Thread-safe model cache
+        self.models: _SafeDict = _SafeDict()
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.cache_dir = self._setup_cache_dir(cache_dir)
+    # be VERY careful with huggingface keys, remote code execution, and model downloads.
+    # If you are using private models or need to authenticate, set the HuggingFace API key.
+    def set_huggingface_key(self, key: str):
+        """
+        Set the HuggingFace API key for model downloads.
+        This is useful if you have a private model or need to authenticate.
+        """
+        os.environ["HF_TOKEN"] = key
+        logger.info("HuggingFace API key set successfully.")
+    def get_huggingface_key(self) -> Optional[str]:
+        """
+        Get the HuggingFace API key if set.
+        This is useful for debugging or checking if authentication is needed.
+        """
+        return os.environ.get("HF_TOKEN")
+    def set_huggingface_cache_directory(self, directory: str):
+        """
+        Set the cache directory for HuggingFace model downloads.
+        This is useful if you want to change the cache location.
+        This will not move your models, it only sets the new default directory.
+        """
+        os.environ["HF_HOME"] = directory
+        logger.info(f"HuggingFace default directory set to: {directory}")
+    def get_huggingface_cache_directory(self) -> Optional[str]:
+        """
+        Get the cache directory for HuggingFace model downloads.
+        This is useful for debugging or checking where models are stored.
+        """
+        return os.environ.get("HF_HOME", str(self.cache_dir))
+    # --------------------------------------------------------------------- #
+    # Internal helpers
+    def _store(self, key: str, info: "ModelInfo") -> None:
+        """Thread-safe insertion into the model cache."""
+        self.models.safe_set(key, info)
+    def _setup_cache_dir(self, cache_dir: Optional[str]) -> Path:
+        """Setup and validate cache directory"""
+        if cache_dir:
+            cache_path = Path(cache_dir)
+        else:
+            # Use default HuggingFace cache location
+            cache_path = Path.home() / ".cache" / "huggingface" / "transformers"
+        cache_path.mkdir(parents=True, exist_ok=True)
+        logger.info(f"Using cache directory: {cache_path}")
+        return cache_path
+    def get_model(self, key: str) -> Optional["ModelInfo"]:
+        """Retrieve a model by its namespaced key."""
+        return self.models.safe_get(key)
+    def is_loaded(self, key: str) -> bool:
+        """Return True if the namespaced key is present in the cache."""
+        return self.models.safe_get(key) is not None
+    def move_model(
+        self,
+        namespaced_key: str,
+        *,
+        device: Optional[torch.device] = None,
+        dtype: Optional[torch.dtype] = None,
+    ) -> Optional[nn.Module]:
+        """
+        Convert device/dtype of a cached model and return the updated object.
+        """
+        model = self._maybe_convert_dtype(namespaced_key, dtype, device)
+        if model is None:
+            logger.warning("move_model: %s not found", namespaced_key)
+        return model
+    def load_tokenizer(
+        self,
+        id: str,
+        tokenizer_name_or_path: str,
+        target_output_device: Optional[torch.device] = None,
+        force_reload: bool = False,
+        trust_remote_code: Optional[bool] = None,
+    ) -> Optional[tuple[PreTrainedTokenizerFast, dict[str, Any]]]:
+        """Load or fetch from cache a Hugging-Face tokenizer."""
+        key = _make_key("tokenizer", id)
+        if not force_reload and self.is_loaded(key):
+            model_info = self.get_model(key)
+            return model_info.model, model_info.metadata
+        try:
+            trust_remote_code = (
+                trust_remote_code if trust_remote_code is not None else TRUST_REMOTE_CODE
+            )
+            tok = AutoTokenizer.from_pretrained(
+                tokenizer_name_or_path, trust_remote_code=trust_remote_code
+            )
+            self._store(
+                key,
+                ModelInfo(
+                    model=tok,
+                    model_type=ModelType.TOKENIZER,
+                    config={"tokenizer_name": tokenizer_name_or_path},
+                    device=target_output_device or torch.device("cpu"),
+                    dtype=torch.float32,
+                    metadata={"source": "huggingface", "trust_remote_code": trust_remote_code},
+                ),
+            )
+            logger.info("Loaded tokenizer %s", key)
+            return tok, self.get_model(key).metadata
+        except Exception:
+            logger.exception("Failed to load tokenizer %s", id)
+            return None
+    def load_shunt_adapter(
+            self,
+            adapter_id: str,
+            config: Dict[str, Any],
+            path: Optional[str] = None,
+            repo_id: Optional[str] = None,
+            filename: Optional[str] = None,
+            device: Optional[torch.device] = None,
+            dtype: Optional[torch.dtype] = None,
+            force_reload: bool = False
+    ) -> Optional[nn.Module]:
+        """
+        Load a shunt adapter from local path or HuggingFace.
+        Args:
+            adapter_id: Unique identifier for the adapter
+            config: Configuration dictionary for the adapter
+            path: Local path to the adapter file
+            repo_id: HuggingFace repository ID
+            filename: Filename in the HuggingFace repository
+            device: Target device
+            dtype: Target dtype
+            force_reload: Force reload even if cached
+        Returns:
+            Loaded adapter model or None if failed
+        """
+        if not force_reload and self.is_loaded(adapter_id):
+            logger.info(f"Using cached adapter: {adapter_id}")
+            return self._maybe_convert_dtype(adapter_id, dtype, device)
+        try:
+            # Import here to avoid circular imports
+            from two_stream_shunt_adapter import ConditionModulationShuntAdapter
+            # Determine file location
+            file_path = self._resolve_file_path(path, repo_id, filename)
+            if not file_path:
+                raise FileNotFoundError(f"Could not find adapter file for {adapter_id}")
+            # Initialize adapter
+            # if the filename ends with t5-vit-l-14-dual_shunt_booru_13_000_000.safetensors we set attention heads to 4, else we set to 12
+            logger.info(f"Loading adapter {adapter_id} from {file_path}")
+            adapter = ConditionModulationShuntAdapter(config=config)
+            logger.info(f"Initialized adapter {adapter_id} with config: {config}")
+            # Load weights
+            state_dict = load_file(file_path)
+            logger.info(f"Loaded state_dict for adapter {adapter_id} from {file_path}")
+            adapter.load_state_dict(state_dict, strict=False)
+            logger.info(f"Adapter {adapter_id} state_dict loaded successfully")
+            # Move to device and dtype
+            device = device or self.device
+            dtype = dtype or torch.float32
+            logger.info(f"Moving adapter {adapter_id} to device: {device}, dtype: {dtype}")
+            adapter = adapter.to(device=device, dtype=dtype)
+            logger.info(f"Adapter {adapter_id} moved to device and dtype successfully")
+            # Cache the model
+            self.models[adapter_id] = ModelInfo(
+                model=adapter,
+                model_type=ModelType.SHUNT_ADAPTER,
+                config=config,
+                device=device,
+                dtype=dtype,
+                metadata={"file_path": str(file_path)}
+            )
+            logger.info(f"Adapter {adapter_id} cached successfully")
+            logger.info(f"Successfully loaded adapter: {adapter_id}")
+            return adapter
+        except Exception as e:
+            logger.error(f"Failed to load adapter {adapter_id} from {path or repo_id}/{filename}: {e}")
+            logger.debug(f"Traceback: {e.__traceback__}")
+            return None
+    def load_encoder_model(self,
+                           model_type: str, # use this to see if it's compatible with the current model manager
+                           model_id: str,
+                           model_name_or_path: str,
+                           device: Optional[torch.device] = None,
+                           dtype: Optional[torch.dtype] = None,
+                           force_reload: bool = False,
+                           trust_remote_code: Optional[bool] = None,  # Overrides the global TRUST_REMOTE_CODE setting.
+                           config: Optional[Dict[str, Any]] = None  # Additional configuration for the model
+    ) -> Optional[nn.Module]:
+        """
+        Load an encoder model (e.g., BERT, T5) and return it.
+        Args:
+            model_type: Type of the model (e.g., "bert", "t5")
+            model_id: Unique identifier for the model
+            model_name_or_path: Model name or path
+            device: Target device
+            dtype: Target dtype
+            force_reload: Force reload even if cached
+        Returns:
+            Loaded model or None if failed
+        """
+        if model_type == "bert":
+            return self.load_bert_model(model_id, model_name_or_path, device, dtype, force_reload, trust_remote_code)
+        elif model_type == "nomic_bert":
+            # Nomic BERT is a specific variant of BERT, so we can use the same loading function
+            return self.load_bert_model(model_id, model_name_or_path, device, dtype, force_reload, trust_remote_code)
+        elif "t5" in model_type:
+            return self.load_t5_model(model_id, model_name_or_path, device, dtype, force_reload, trust_remote_code, config)
+        else:
+            logger.error(f"Unsupported model type: {model_type}")
+            return None
+    def load_bert_model(
+            self,
+            model_id: str,
+            model_name_or_path: str,
+            device: Optional[torch.device] = None,
+            dtype: Optional[torch.dtype] = None,
+            force_reload: bool = False,
+            trust_remote_code: Optional[bool] = None  # Overrides the global TRUST_REMOTE_CODE setting.
+    ) -> Optional[Tuple[nn.Module, Any]]:
+        """
+        Load a BERT model and tokenizer.
+        Returns:
+            Tuple of (model, tokenizer) or None if failed
+        """
+        if not force_reload and self.is_loaded(model_id):
+            logger.info(f"Using cached BERT model: {model_id}")
+            model_info = self.get_model(model_id)
+            return model_info.model, model_info.metadata.get("tokenizer")
+        try:
+            device = device or self.device
+            dtype = dtype or torch.float32
+            config = AutoConfig.from_pretrained(
+                model_name_or_path,
+                trust_remote_code=trust_remote_code if trust_remote_code is not None else TRUST_REMOTE_CODE  # Use the global flag for remote code execution
+            )
+            # Load tokenizer and model
+            tokenizer = AutoTokenizer.from_pretrained(
+                model_name_or_path,
+                config=config,
+                use_special_tokens=True,  # Ensure special tokens are used
+                trust_remote_code=trust_remote_code if trust_remote_code is not None else TRUST_REMOTE_CODE  # Use the global flag for remote code execution
+            )
+            model = AutoModel.from_pretrained(
+                model_name_or_path,
+                config=config,
+                torch_dtype=dtype,
+                trust_remote_code=trust_remote_code if trust_remote_code is not None else TRUST_REMOTE_CODE  # Use the global flag for remote code execution
+            ).to(device)
+            # Cache the model
+            self._store(_make_key("bert", model_id), ModelInfo(
+                model=model,
+                model_type=ModelType.BERT_MODEL,
+                config={"model_name": model_name_or_path},
+                device=device,
+                dtype=dtype,
+                metadata={"tokenizer": tokenizer},
+                trust_remote_code=trust_remote_code if trust_remote_code is not None else TRUST_REMOTE_CODE
+            ))
+            logger.info(f"Successfully loaded BERT model: {model_id}")
+            return model, tokenizer
+        except Exception as e:
+            logger.error(f"Failed to load BERT model {model_id}: {e}")
+            return None
+    def load_t5_model(
+            self,
+            model_id: str,
+            model_name_or_path: str,
+            device: Optional[torch.device] = None,
+            dtype: Optional[torch.dtype] = None,
+            force_reload: bool = False,
+            override_remote_code: Optional[bool] = None, # Overrides the global TRUST_REMOTE_CODE setting.
+            config: Optional[Dict[str, Any]] = None  # Additional configuration for the model
+    ) -> Optional[Tuple[nn.Module, Any]]:
+        """
+        Load a T5 model and tokenizer.
+        Returns:
+            Tuple of (model, tokenizer) or None if failed
+        """
+        if not force_reload and self.is_loaded(model_id):
+            logger.info(f"Using cached T5 model: {model_id}")
+            model_info = self.get_model(model_id)
+            return model_info.model, model_info.metadata.get("tokenizer")
+        try:
+            device = device or self.device
+            dtype = dtype or torch.float32
+            trust_remote_code = override_remote_code if override_remote_code is not None else TRUST_REMOTE_CODE
+            # Load tokenizer and model
+            if config.get("type", "t5") == "t5":
+                tokenizer = AutoTokenizer.from_pretrained(
+                    "google/flan-t5-base",
+                    trust_remote_code=trust_remote_code  # Use the global flag for remote code execution
+                )
+            elif config.get("type", "t5") == "t5_unchained":
+                tokenizer = T5TokenizerFast.from_pretrained(
+                    "AbstractPhil/t5xxl-unchained",
+                    trust_remote_code=trust_remote_code  # Use the global flag for remote code execution
+                )
+            else:
+                tokenizer = T5TokenizerFast.from_pretrained(
+                    "google/flan-t5-base",
+                    trust_remote_code=trust_remote_code  # Use the global flag for remote code execution
+                )
+            if config.get("type", "t5") == "t5":
+                logger.info(f"Loading T5ForConditionalGeneration model from {model_name_or_path}")
+                model = AutoModelForSeq2SeqLM.from_pretrained(
+                    model_name_or_path,
+                    torch_dtype=dtype,
+                    trust_remote_code=trust_remote_code  # Use the global flag for remote code execution
+                ).to(device)
+            elif config.get("type", "t5") == "t5_encoder_with_projection":
+                # Load T5EncoderModel with projection layer
+                logger.info(f"Loading T5EncoderWithProjection model from {model_name_or_path}")
+                model = T5EncoderWithProjection.from_pretrained(
+                    model_name_or_path,
+                    torch_dtype=dtype,
+                    trust_remote_code=trust_remote_code  # Use the global flag for remote code execution
+                ).to(device)
+            else:
+                # Load standard T5 model
+                logger.info(f"Loading T5EncoderModel from {model_name_or_path}")
+                model = AutoModel.from_pretrained(
+                    model_name_or_path,
+                    torch_dtype=dtype,
+                    trust_remote_code=trust_remote_code  # Use the global flag for remote code execution
+                ).to(device)
+            # Cache the model
+            self._store(_make_key("t5", model_id), ModelInfo(
+                model=model,
+                model_type=ModelType.T5_MODEL,
+                config={"model_name": model_name_or_path},
+                device=device,
+                dtype=dtype,
+                metadata={"tokenizer": tokenizer}
+            ))
+            logger.info(f"Successfully loaded T5 model: {model_id}")
+            return model, tokenizer
+        except Exception as e:
+            logger.error(f"Failed to load T5 model {model_id}: {e}")
+            return None
+    def unload_model(self, model_id: str) -> bool:
+        """
+        Unload a model to free memory.
+        Returns:
+            True if successfully unloaded, False otherwise
+        """
+        if model_id in self.models:
+            try:
+                # Move to CPU first to free GPU memory
+                model_info = self.models[model_id]
+                model_info.model.cpu()
+                # Delete the model
+                del self.models[model_id]
+                # Force garbage collection
+                import gc
+                gc.collect()
+                if torch.cuda.is_available():
+                    torch.cuda.empty_cache()
+                logger.info(f"Successfully unloaded model: {model_id}")
+                return True
+            except Exception as e:
+                logger.error(f"Failed to unload model {model_id}: {e}")
+                return False
+        else:
+            logger.warning(f"Model {model_id} not found in cache")
+            return False
+    def list_models(self) -> Dict[str, Dict[str, Any]]:
+        """List all loaded models with their information"""
+        return {
+            model_id: {
+                "type": info.model_type.value,
+                "device": str(info.device),
+                "dtype": str(info.dtype),
+                "config": info.config
+            }
+            for model_id, info in self.models.items()
+        }
+    def clear_all(self):
+        """Clear all loaded models"""
+        model_ids = list(self.models.keys())
+        for model_id in model_ids:
+            self.unload_model(model_id)
+        logger.info("All models cleared from memory")
+    def _resolve_file_path(
+            self,
+            local_path: Optional[str],
+            repo_id: Optional[str],
+            filename: Optional[str]
+    ) -> Optional[Path]:
+        """Resolve file path from local or HuggingFace"""
+        # Try local path first
+        if local_path and os.path.exists(local_path):
+            return Path(local_path)
+        # Try HuggingFace
+        if repo_id and filename:
+            try:
+                from huggingface_hub import hf_hub_download
+                file_path = hf_hub_download(
+                    repo_id=repo_id,
+                    filename=filename,
+                    cache_dir=str(self.cache_dir),
+                    repo_type="model"
+                )
+                return Path(file_path)
+            except Exception as e:
+                logger.error(f"Failed to download from HuggingFace: {e}")
+        return None
+    def _maybe_convert_dtype(
+            self,
+            model_id: str,
+            target_dtype: Optional[torch.dtype],
+            target_device: Optional[torch.device]
+    ) -> Optional[nn.Module]:
+        """Convert model dtype/device if needed"""
+        model_info = self.get_model(model_id)
+        if not model_info:
+            return None
+        model = model_info.model
+        changed = False
+        # Check dtype conversion
+        if target_dtype and model_info.dtype != target_dtype:
+            try:
+                model = model.to(dtype=target_dtype)
+                model_info.dtype = target_dtype
+                changed = True
+                logger.info(f"Converted {model_id} to dtype: {target_dtype}")
+            except Exception as e:
+                logger.error(f"Failed to convert dtype for {model_id}: {e}")
+        # Check device conversion
+        if target_device and model_info.device != target_device:
+            try:
+                model = model.to(device=target_device)
+                model_info.device = target_device
+                changed = True
+                logger.info(f"Moved {model_id} to device: {target_device}")
+            except Exception as e:
+                logger.error(f"Failed to move {model_id} to device: {e}")
+        if changed:
+            model_info.model = model
+        return model
+    def __del__(self):
+        """Cleanup on deletion"""
+        self.clear_all()
+# Global instance (singleton pattern)
+_global_model_manager: Optional[ModelManager] = None
+def get_model_manager(cache_dir: Optional[str] = None) -> ModelManager:
+    """Get or create the global model manager instance"""
+    global _global_model_manager
+    if _global_model_manager is None:
+        _global_model_manager = ModelManager(cache_dir=cache_dir)
+    return _global_model_manager

requirements.txt CHANGED Viewed

@@ -1,8 +1,8 @@
 sentencepiece
 accelerate
 diffusers
-invisible_watermark
 torch
 transformers
 xformers
-matplotlib

 sentencepiece
 accelerate
 diffusers
 torch
 transformers
 xformers
+matplotlib
+gradio

two_stream_shunt_adapter.py CHANGED Viewed

@@ -2,7 +2,6 @@ from typing import Tuple
 import torch
 import torch.nn as nn
-from . import ENCODER_CONFIGS, HARMONIC_SHUNT_REPOS
 class DualConversionNames:

 import torch
 import torch.nn as nn
 class DualConversionNames: