Spaces:

AbstractPhil
/

shunt-adapter-testing

Runtime error

App Files Files Community

AbstractPhil commited on Jun 1

Commit

ce712b4

1 Parent(s): cae6d82

yes

Browse files

Files changed (1) hide show

app.py +15 -34

app.py CHANGED Viewed

@@ -107,11 +107,11 @@ def encode_sdxl_prompt(prompt, negative_prompt=""):
     ).input_ids.to(device)
     with torch.no_grad():
-        # CLIP-L embeddings (768d)
         clip_l_embeds = pipe.text_encoder(tokens_l)[0]
         neg_clip_l_embeds = pipe.text_encoder(neg_tokens_l)[0]
-        # CLIP-G embeddings (1280d) - get the hidden states [0], not pooled [1]
         clip_g_embeds = pipe.text_encoder_2(tokens_g)[0]
         neg_clip_g_embeds = pipe.text_encoder_2(neg_tokens_g)[0]
@@ -142,30 +142,28 @@ def infer(prompt, negative_prompt, adapter_l_file, adapter_g_file, strength, noi
     if scheduler_name in SCHEDULERS:
         pipe.scheduler = SCHEDULERS[scheduler_name].from_config(pipe.scheduler.config)
-    # Get T5 embeddings for semantic understanding
-    t5_ids = t5_tok(prompt, return_tensors="pt", padding=True, truncation=True).input_ids.to(device)
     t5_seq = t5_mod(t5_ids).last_hidden_state
     # Get proper SDXL CLIP embeddings
     clip_embeds = encode_sdxl_prompt(prompt, negative_prompt)
     # Load adapters
     adapter_l = load_adapter(repo_l, adapter_l_file, config_l) if adapter_l_file else None
     adapter_g = load_adapter(repo_g, adapter_g_file, config_g) if adapter_g_file else None
-    # Ensure all embeddings have the same sequence length (77 tokens)
-    seq_len = 77
-    # Resize T5 to match CLIP sequence length
-    if t5_seq.size(1) != seq_len:
-        t5_seq = torch.nn.functional.interpolate(
-            t5_seq.transpose(1, 2),
-            size=seq_len,
-            mode="nearest"
-        ).transpose(1, 2)
-    print(f"After resize - T5: {t5_seq.shape}, CLIP-L: {clip_embeds['clip_l'].shape}, CLIP-G: {clip_embeds['clip_g'].shape}")
     # Apply CLIP-L adapter
     if adapter_l is not None:
         anchor_l, delta_l, log_sigma_l, attn_l1, attn_l2, tau_l, g_pred_l, gate_l = adapter_l(t5_seq, clip_embeds["clip_l"])
@@ -193,23 +191,6 @@ def infer(prompt, negative_prompt, adapter_l_file, adapter_g_file, strength, noi
             clip_g_mod = clip_g_mod * (1 - gate_g_scaled) + anchor_g * gate_g_scaled
         if noise > 0:
             clip_g_mod += torch.randn_like(clip_g_mod) * noise
-    else:
-        clip_g_mod = clip_embeds["clip_g"]
-        delta_g_final = torch.zeros_like(clip_embeds["clip_g"])
-        gate_g_scaled = torch.zeros_like(clip_embeds["clip_g"])
-        g_pred_g = torch.tensor(0.0)
-        tau_g = torch.tensor(0.0) 2)
-        else:
-            t5_seq_resized = t5_seq
-        anchor_g, delta_g, log_sigma_g, attn_g1, attn_g2, tau_g, g_pred_g, gate_g = adapter_g(t5_seq_resized, clip_embeds["clip_g"])
-        gate_g_scaled = gate_g * gate_prob
-        delta_g_final = delta_g * strength * gate_g_scaled
-        clip_g_mod = clip_embeds["clip_g"] + delta_g_final
-        if use_anchor:
-            clip_g_mod = clip_g_mod * (1 - gate_g_scaled) + anchor_g * gate_g_scaled
-        if noise > 0:
-            clip_g_mod += torch.randn_like(clip_g_mod) * noise
     else:
         clip_g_mod = clip_embeds["clip_g"]
         delta_g_final = torch.zeros_like(clip_embeds["clip_g"])

     ).input_ids.to(device)
     with torch.no_grad():
+        # CLIP-L embeddings (768d) - [0] is sequence, [1] is pooled
         clip_l_embeds = pipe.text_encoder(tokens_l)[0]
         neg_clip_l_embeds = pipe.text_encoder(neg_tokens_l)[0]
+        # CLIP-G embeddings (1280d) - [0] is sequence, [1] is pooled
         clip_g_embeds = pipe.text_encoder_2(tokens_g)[0]
         neg_clip_g_embeds = pipe.text_encoder_2(neg_tokens_g)[0]
     if scheduler_name in SCHEDULERS:
         pipe.scheduler = SCHEDULERS[scheduler_name].from_config(pipe.scheduler.config)
+    # Get T5 embeddings for semantic understanding - standardize to 77 tokens like CLIP
+    t5_ids = t5_tok(
+        prompt,
+        return_tensors="pt",
+        padding="max_length",
+        max_length=77,
+        truncation=True
+    ).input_ids.to(device)
     t5_seq = t5_mod(t5_ids).last_hidden_state
     # Get proper SDXL CLIP embeddings
     clip_embeds = encode_sdxl_prompt(prompt, negative_prompt)
+    # Debug shapes
+    print(f"T5 seq shape: {t5_seq.shape}")
+    print(f"CLIP-L shape: {clip_embeds['clip_l'].shape}")
+    print(f"CLIP-G shape: {clip_embeds['clip_g'].shape}")
     # Load adapters
     adapter_l = load_adapter(repo_l, adapter_l_file, config_l) if adapter_l_file else None
     adapter_g = load_adapter(repo_g, adapter_g_file, config_g) if adapter_g_file else None
     # Apply CLIP-L adapter
     if adapter_l is not None:
         anchor_l, delta_l, log_sigma_l, attn_l1, attn_l2, tau_l, g_pred_l, gate_l = adapter_l(t5_seq, clip_embeds["clip_l"])
             clip_g_mod = clip_g_mod * (1 - gate_g_scaled) + anchor_g * gate_g_scaled
         if noise > 0:
             clip_g_mod += torch.randn_like(clip_g_mod) * noise
     else:
         clip_g_mod = clip_embeds["clip_g"]
         delta_g_final = torch.zeros_like(clip_embeds["clip_g"])