Spaces:

Chintan-Shah
/

SD_Style_Exploration

Sleeping

App Files Files Community

Chintan-Shah commited on Aug 2, 2024

Commit

b29643b

verified ·

1 Parent(s): 0dea9c5

Update app.py

Browse files

Files changed (1) hide show

app.py +9 -9

app.py CHANGED Viewed

@@ -81,13 +81,13 @@ def latent_loss(latent, conditioning_image):
 def generate_with_embs(text_input, text_embeddings, conditioning_image):
     height = 512                        # default height of Stable Diffusion
     width = 512                         # default width of Stable Diffusion
-    num_inference_steps = 30            # Number of denoising steps
     guidance_scale = 7.5                # Scale for classifier-free guidance
     generator = torch.manual_seed(torch.seed())   # Seed generator to create the inital latent noise
     batch_size = 1
     loss_scale = 100 #@param
-    imageCondSteps = 5
     max_length = text_input.input_ids.shape[-1]
     uncond_input = tokenizer(
@@ -140,7 +140,7 @@ def generate_with_embs(text_input, text_embeddings, conditioning_image):
                 loss = latent_loss(latents_x0, conditioning_image) * loss_scale
                 # loss = blue_loss(denoised_images) * blue_loss_scale
-                print(i, 'loss item:', loss.item())
                 # Get gradient
                 cond_grad = torch.autograd.grad(loss, latents)[0]
@@ -169,12 +169,12 @@ def getImageWithStyle(prompt, style_name, conditioning_image):
     # print("text_input:", text_input)
     input_ids = text_input.input_ids.to(torch_device)
     # print("Input Ids:", input_ids)
-    print("Input Ids shape:", input_ids.shape)
     token_emb_layer = text_encoder.text_model.embeddings.token_embedding
     # Get token embeddings
     token_embeddings = token_emb_layer(input_ids)
-    print("Token Embeddings shape:", token_embeddings.shape)
     # The new embedding - our special style word
     replacement_token_embedding = style_embed[new_style].to(torch_device)
@@ -184,9 +184,9 @@ def getImageWithStyle(prompt, style_name, conditioning_image):
     pos_emb_layer = text_encoder.text_model.embeddings.position_embedding
     position_ids = text_encoder.text_model.embeddings.position_ids
-    print("position_ids shape:", position_ids.shape)
     position_embeddings = pos_emb_layer(position_ids)
-    print("Position Embeddings shape:", token_embeddings.shape)
     # Combine with pos embs
     input_embeddings = token_embeddings + position_embeddings
@@ -257,7 +257,7 @@ def generateOutput(prompt, style_name, conditioning_image):
 title = "Stable Diffusion SD Styles along with image conditioning"
 description = "Shows the Stable Diffusion usage with SD Styles as well as ways to condition using different loss aspects"
-examples = [["A farm", 'midjourney', 'conditioning_images/indianflag.jpg'],["A playground", 'lineart', None]]
 style_options = ['birb', 'moebius', 'midjourney', 'cute-game-style', 'depthmap', 'hitokomoru', 'lineart', 'madhubani']
 demo = gr.Interface(
     generateOutput,
@@ -267,7 +267,7 @@ demo = gr.Interface(
         gr.Image(width=256, height=256, label="Image to use for Conditioning", type='pil'),
         ],
     outputs = [
-        gr.Image(width=256, height=256, label="Output"),
         ],
     title = title,
     description = description,

 def generate_with_embs(text_input, text_embeddings, conditioning_image):
     height = 512                        # default height of Stable Diffusion
     width = 512                         # default width of Stable Diffusion
+    num_inference_steps = 10            # Number of denoising steps
     guidance_scale = 7.5                # Scale for classifier-free guidance
     generator = torch.manual_seed(torch.seed())   # Seed generator to create the inital latent noise
     batch_size = 1
     loss_scale = 100 #@param
+    imageCondSteps = 2
     max_length = text_input.input_ids.shape[-1]
     uncond_input = tokenizer(
                 loss = latent_loss(latents_x0, conditioning_image) * loss_scale
                 # loss = blue_loss(denoised_images) * blue_loss_scale
+                # print(i, 'loss item:', loss.item())
                 # Get gradient
                 cond_grad = torch.autograd.grad(loss, latents)[0]
     # print("text_input:", text_input)
     input_ids = text_input.input_ids.to(torch_device)
     # print("Input Ids:", input_ids)
+    # print("Input Ids shape:", input_ids.shape)
     token_emb_layer = text_encoder.text_model.embeddings.token_embedding
     # Get token embeddings
     token_embeddings = token_emb_layer(input_ids)
+    # print("Token Embeddings shape:", token_embeddings.shape)
     # The new embedding - our special style word
     replacement_token_embedding = style_embed[new_style].to(torch_device)
     pos_emb_layer = text_encoder.text_model.embeddings.position_embedding
     position_ids = text_encoder.text_model.embeddings.position_ids
+    # print("position_ids shape:", position_ids.shape)
     position_embeddings = pos_emb_layer(position_ids)
+    # print("Position Embeddings shape:", token_embeddings.shape)
     # Combine with pos embs
     input_embeddings = token_embeddings + position_embeddings
 title = "Stable Diffusion SD Styles along with image conditioning"
 description = "Shows the Stable Diffusion usage with SD Styles as well as ways to condition using different loss aspects"
+examples = [["A farm", 'midjourney', 'conditioning_images/indianflag.jpg'],["A playground", 'lineart', None],["A theme park", 'cute-game-style', 'conditioning_images/vividcolors.jpg'],["A mouse", 'depthmap', 'conditioning_images/autumn.jpg']]
 style_options = ['birb', 'moebius', 'midjourney', 'cute-game-style', 'depthmap', 'hitokomoru', 'lineart', 'madhubani']
 demo = gr.Interface(
     generateOutput,
         gr.Image(width=256, height=256, label="Image to use for Conditioning", type='pil'),
         ],
     outputs = [
+        gr.Image(width=512, height=512, label="Output"),
         ],
     title = title,
     description = description,