Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -81,13 +81,13 @@ def latent_loss(latent, conditioning_image):
|
|
81 |
def generate_with_embs(text_input, text_embeddings, conditioning_image):
|
82 |
height = 512 # default height of Stable Diffusion
|
83 |
width = 512 # default width of Stable Diffusion
|
84 |
-
num_inference_steps =
|
85 |
guidance_scale = 7.5 # Scale for classifier-free guidance
|
86 |
|
87 |
generator = torch.manual_seed(torch.seed()) # Seed generator to create the inital latent noise
|
88 |
batch_size = 1
|
89 |
loss_scale = 100 #@param
|
90 |
-
imageCondSteps =
|
91 |
|
92 |
max_length = text_input.input_ids.shape[-1]
|
93 |
uncond_input = tokenizer(
|
@@ -140,7 +140,7 @@ def generate_with_embs(text_input, text_embeddings, conditioning_image):
|
|
140 |
loss = latent_loss(latents_x0, conditioning_image) * loss_scale
|
141 |
# loss = blue_loss(denoised_images) * blue_loss_scale
|
142 |
|
143 |
-
print(i, 'loss item:', loss.item())
|
144 |
|
145 |
# Get gradient
|
146 |
cond_grad = torch.autograd.grad(loss, latents)[0]
|
@@ -169,12 +169,12 @@ def getImageWithStyle(prompt, style_name, conditioning_image):
|
|
169 |
# print("text_input:", text_input)
|
170 |
input_ids = text_input.input_ids.to(torch_device)
|
171 |
# print("Input Ids:", input_ids)
|
172 |
-
print("Input Ids shape:", input_ids.shape)
|
173 |
|
174 |
token_emb_layer = text_encoder.text_model.embeddings.token_embedding
|
175 |
# Get token embeddings
|
176 |
token_embeddings = token_emb_layer(input_ids)
|
177 |
-
print("Token Embeddings shape:", token_embeddings.shape)
|
178 |
|
179 |
# The new embedding - our special style word
|
180 |
replacement_token_embedding = style_embed[new_style].to(torch_device)
|
@@ -184,9 +184,9 @@ def getImageWithStyle(prompt, style_name, conditioning_image):
|
|
184 |
|
185 |
pos_emb_layer = text_encoder.text_model.embeddings.position_embedding
|
186 |
position_ids = text_encoder.text_model.embeddings.position_ids
|
187 |
-
print("position_ids shape:", position_ids.shape)
|
188 |
position_embeddings = pos_emb_layer(position_ids)
|
189 |
-
print("Position Embeddings shape:", token_embeddings.shape)
|
190 |
|
191 |
# Combine with pos embs
|
192 |
input_embeddings = token_embeddings + position_embeddings
|
@@ -257,7 +257,7 @@ def generateOutput(prompt, style_name, conditioning_image):
|
|
257 |
|
258 |
title = "Stable Diffusion SD Styles along with image conditioning"
|
259 |
description = "Shows the Stable Diffusion usage with SD Styles as well as ways to condition using different loss aspects"
|
260 |
-
examples = [["A farm", 'midjourney', 'conditioning_images/indianflag.jpg'],["A playground", 'lineart', None]]
|
261 |
style_options = ['birb', 'moebius', 'midjourney', 'cute-game-style', 'depthmap', 'hitokomoru', 'lineart', 'madhubani']
|
262 |
demo = gr.Interface(
|
263 |
generateOutput,
|
@@ -267,7 +267,7 @@ demo = gr.Interface(
|
|
267 |
gr.Image(width=256, height=256, label="Image to use for Conditioning", type='pil'),
|
268 |
],
|
269 |
outputs = [
|
270 |
-
gr.Image(width=
|
271 |
],
|
272 |
title = title,
|
273 |
description = description,
|
|
|
81 |
def generate_with_embs(text_input, text_embeddings, conditioning_image):
|
82 |
height = 512 # default height of Stable Diffusion
|
83 |
width = 512 # default width of Stable Diffusion
|
84 |
+
num_inference_steps = 10 # Number of denoising steps
|
85 |
guidance_scale = 7.5 # Scale for classifier-free guidance
|
86 |
|
87 |
generator = torch.manual_seed(torch.seed()) # Seed generator to create the inital latent noise
|
88 |
batch_size = 1
|
89 |
loss_scale = 100 #@param
|
90 |
+
imageCondSteps = 2
|
91 |
|
92 |
max_length = text_input.input_ids.shape[-1]
|
93 |
uncond_input = tokenizer(
|
|
|
140 |
loss = latent_loss(latents_x0, conditioning_image) * loss_scale
|
141 |
# loss = blue_loss(denoised_images) * blue_loss_scale
|
142 |
|
143 |
+
# print(i, 'loss item:', loss.item())
|
144 |
|
145 |
# Get gradient
|
146 |
cond_grad = torch.autograd.grad(loss, latents)[0]
|
|
|
169 |
# print("text_input:", text_input)
|
170 |
input_ids = text_input.input_ids.to(torch_device)
|
171 |
# print("Input Ids:", input_ids)
|
172 |
+
# print("Input Ids shape:", input_ids.shape)
|
173 |
|
174 |
token_emb_layer = text_encoder.text_model.embeddings.token_embedding
|
175 |
# Get token embeddings
|
176 |
token_embeddings = token_emb_layer(input_ids)
|
177 |
+
# print("Token Embeddings shape:", token_embeddings.shape)
|
178 |
|
179 |
# The new embedding - our special style word
|
180 |
replacement_token_embedding = style_embed[new_style].to(torch_device)
|
|
|
184 |
|
185 |
pos_emb_layer = text_encoder.text_model.embeddings.position_embedding
|
186 |
position_ids = text_encoder.text_model.embeddings.position_ids
|
187 |
+
# print("position_ids shape:", position_ids.shape)
|
188 |
position_embeddings = pos_emb_layer(position_ids)
|
189 |
+
# print("Position Embeddings shape:", token_embeddings.shape)
|
190 |
|
191 |
# Combine with pos embs
|
192 |
input_embeddings = token_embeddings + position_embeddings
|
|
|
257 |
|
258 |
title = "Stable Diffusion SD Styles along with image conditioning"
|
259 |
description = "Shows the Stable Diffusion usage with SD Styles as well as ways to condition using different loss aspects"
|
260 |
+
examples = [["A farm", 'midjourney', 'conditioning_images/indianflag.jpg'],["A playground", 'lineart', None],["A theme park", 'cute-game-style', 'conditioning_images/vividcolors.jpg'],["A mouse", 'depthmap', 'conditioning_images/autumn.jpg']]
|
261 |
style_options = ['birb', 'moebius', 'midjourney', 'cute-game-style', 'depthmap', 'hitokomoru', 'lineart', 'madhubani']
|
262 |
demo = gr.Interface(
|
263 |
generateOutput,
|
|
|
267 |
gr.Image(width=256, height=256, label="Image to use for Conditioning", type='pil'),
|
268 |
],
|
269 |
outputs = [
|
270 |
+
gr.Image(width=512, height=512, label="Output"),
|
271 |
],
|
272 |
title = title,
|
273 |
description = description,
|