Chintan-Shah commited on
Commit
b29643b
·
verified ·
1 Parent(s): 0dea9c5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -9
app.py CHANGED
@@ -81,13 +81,13 @@ def latent_loss(latent, conditioning_image):
81
  def generate_with_embs(text_input, text_embeddings, conditioning_image):
82
  height = 512 # default height of Stable Diffusion
83
  width = 512 # default width of Stable Diffusion
84
- num_inference_steps = 30 # Number of denoising steps
85
  guidance_scale = 7.5 # Scale for classifier-free guidance
86
 
87
  generator = torch.manual_seed(torch.seed()) # Seed generator to create the inital latent noise
88
  batch_size = 1
89
  loss_scale = 100 #@param
90
- imageCondSteps = 5
91
 
92
  max_length = text_input.input_ids.shape[-1]
93
  uncond_input = tokenizer(
@@ -140,7 +140,7 @@ def generate_with_embs(text_input, text_embeddings, conditioning_image):
140
  loss = latent_loss(latents_x0, conditioning_image) * loss_scale
141
  # loss = blue_loss(denoised_images) * blue_loss_scale
142
 
143
- print(i, 'loss item:', loss.item())
144
 
145
  # Get gradient
146
  cond_grad = torch.autograd.grad(loss, latents)[0]
@@ -169,12 +169,12 @@ def getImageWithStyle(prompt, style_name, conditioning_image):
169
  # print("text_input:", text_input)
170
  input_ids = text_input.input_ids.to(torch_device)
171
  # print("Input Ids:", input_ids)
172
- print("Input Ids shape:", input_ids.shape)
173
 
174
  token_emb_layer = text_encoder.text_model.embeddings.token_embedding
175
  # Get token embeddings
176
  token_embeddings = token_emb_layer(input_ids)
177
- print("Token Embeddings shape:", token_embeddings.shape)
178
 
179
  # The new embedding - our special style word
180
  replacement_token_embedding = style_embed[new_style].to(torch_device)
@@ -184,9 +184,9 @@ def getImageWithStyle(prompt, style_name, conditioning_image):
184
 
185
  pos_emb_layer = text_encoder.text_model.embeddings.position_embedding
186
  position_ids = text_encoder.text_model.embeddings.position_ids
187
- print("position_ids shape:", position_ids.shape)
188
  position_embeddings = pos_emb_layer(position_ids)
189
- print("Position Embeddings shape:", token_embeddings.shape)
190
 
191
  # Combine with pos embs
192
  input_embeddings = token_embeddings + position_embeddings
@@ -257,7 +257,7 @@ def generateOutput(prompt, style_name, conditioning_image):
257
 
258
  title = "Stable Diffusion SD Styles along with image conditioning"
259
  description = "Shows the Stable Diffusion usage with SD Styles as well as ways to condition using different loss aspects"
260
- examples = [["A farm", 'midjourney', 'conditioning_images/indianflag.jpg'],["A playground", 'lineart', None]]
261
  style_options = ['birb', 'moebius', 'midjourney', 'cute-game-style', 'depthmap', 'hitokomoru', 'lineart', 'madhubani']
262
  demo = gr.Interface(
263
  generateOutput,
@@ -267,7 +267,7 @@ demo = gr.Interface(
267
  gr.Image(width=256, height=256, label="Image to use for Conditioning", type='pil'),
268
  ],
269
  outputs = [
270
- gr.Image(width=256, height=256, label="Output"),
271
  ],
272
  title = title,
273
  description = description,
 
81
  def generate_with_embs(text_input, text_embeddings, conditioning_image):
82
  height = 512 # default height of Stable Diffusion
83
  width = 512 # default width of Stable Diffusion
84
+ num_inference_steps = 10 # Number of denoising steps
85
  guidance_scale = 7.5 # Scale for classifier-free guidance
86
 
87
  generator = torch.manual_seed(torch.seed()) # Seed generator to create the inital latent noise
88
  batch_size = 1
89
  loss_scale = 100 #@param
90
+ imageCondSteps = 2
91
 
92
  max_length = text_input.input_ids.shape[-1]
93
  uncond_input = tokenizer(
 
140
  loss = latent_loss(latents_x0, conditioning_image) * loss_scale
141
  # loss = blue_loss(denoised_images) * blue_loss_scale
142
 
143
+ # print(i, 'loss item:', loss.item())
144
 
145
  # Get gradient
146
  cond_grad = torch.autograd.grad(loss, latents)[0]
 
169
  # print("text_input:", text_input)
170
  input_ids = text_input.input_ids.to(torch_device)
171
  # print("Input Ids:", input_ids)
172
+ # print("Input Ids shape:", input_ids.shape)
173
 
174
  token_emb_layer = text_encoder.text_model.embeddings.token_embedding
175
  # Get token embeddings
176
  token_embeddings = token_emb_layer(input_ids)
177
+ # print("Token Embeddings shape:", token_embeddings.shape)
178
 
179
  # The new embedding - our special style word
180
  replacement_token_embedding = style_embed[new_style].to(torch_device)
 
184
 
185
  pos_emb_layer = text_encoder.text_model.embeddings.position_embedding
186
  position_ids = text_encoder.text_model.embeddings.position_ids
187
+ # print("position_ids shape:", position_ids.shape)
188
  position_embeddings = pos_emb_layer(position_ids)
189
+ # print("Position Embeddings shape:", token_embeddings.shape)
190
 
191
  # Combine with pos embs
192
  input_embeddings = token_embeddings + position_embeddings
 
257
 
258
  title = "Stable Diffusion SD Styles along with image conditioning"
259
  description = "Shows the Stable Diffusion usage with SD Styles as well as ways to condition using different loss aspects"
260
+ examples = [["A farm", 'midjourney', 'conditioning_images/indianflag.jpg'],["A playground", 'lineart', None],["A theme park", 'cute-game-style', 'conditioning_images/vividcolors.jpg'],["A mouse", 'depthmap', 'conditioning_images/autumn.jpg']]
261
  style_options = ['birb', 'moebius', 'midjourney', 'cute-game-style', 'depthmap', 'hitokomoru', 'lineart', 'madhubani']
262
  demo = gr.Interface(
263
  generateOutput,
 
267
  gr.Image(width=256, height=256, label="Image to use for Conditioning", type='pil'),
268
  ],
269
  outputs = [
270
+ gr.Image(width=512, height=512, label="Output"),
271
  ],
272
  title = title,
273
  description = description,