Chintan-Shah commited on
Commit
cd95943
·
verified ·
1 Parent(s): 1285101

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -40
app.py CHANGED
@@ -70,20 +70,21 @@ def get_output_embeds(input_embeddings):
70
 
71
  def latent_loss(latent, conditioning_image):
72
  # How far are the image embeds from lossembeds:
73
- image = Image.open(conditioning_image)
74
- r_image = image.resize((512,512))
75
  r_latent = pil_to_latent(r_image)
76
  error = F.mse_loss(0.5*latent,0.5*r_latent)
77
  return error
78
  #Generating an image with these modified embeddings
79
 
80
 
81
- def generate_with_embs(text_input, text_embeddings, conditioning_image, seed):
82
  height = 512 # default height of Stable Diffusion
83
  width = 512 # default width of Stable Diffusion
84
  num_inference_steps = 30 # Number of denoising steps
85
  guidance_scale = 7.5 # Scale for classifier-free guidance
86
- generator = torch.manual_seed(seed) # Seed generator to create the inital latent noise
 
87
  batch_size = 1
88
  loss_scale = 100 #@param
89
  imageCondSteps = 5
@@ -154,24 +155,14 @@ def generate_with_embs(text_input, text_embeddings, conditioning_image, seed):
154
  return latents_to_pil(latents)[0]
155
 
156
 
157
- def getImageWithStyle(prompt, style_path, conditioning_image, seed):
158
  prompt = prompt + ' in the style of puppy'
159
 
160
- style_loc = style_path + '/learned_embeds.bin'
161
  style_embed = torch.load(style_loc)
162
  # print(style_embed)
163
  # print(style_embed.keys(), style_embed[style_embed.keys()[0]].shape)
164
  new_style = list(style_embed.keys())[0]
165
- print("New style:", new_style)
166
-
167
- filename_style = new_style.replace("<", "")
168
- filename_style = filename_style.replace(">", "")
169
- filename = prompt.replace("puppy", filename_style)
170
- if (conditioning_image):
171
- imagename = Path(conditioning_image).stem
172
- print("Conditioned imagename:", imagename)
173
- filename = filename + "_" + imagename + "_conditioned"
174
- print("filename:", filename)
175
 
176
  # Tokenize
177
  text_input = tokenizer(prompt, padding="max_length", max_length=tokenizer.model_max_length, truncation=True, return_tensors="pt")
@@ -204,10 +195,11 @@ def getImageWithStyle(prompt, style_path, conditioning_image, seed):
204
  modified_output_embeddings = get_output_embeds(input_embeddings)
205
 
206
  # And generate an image with this:
207
- image = generate_with_embs(text_input, modified_output_embeddings, conditioning_image, seed)
208
- name = "./Outputs/" + filename+".jpg"
 
209
 
210
- image.save(name)
211
 
212
 
213
  # Supress some unnecessary warnings when loading the CLIPTextModel
@@ -240,35 +232,39 @@ image_encoder = image_encoder.to(torch_device)
240
 
241
  # Used puppy as a placeholder here since the token is known
242
  # Will replace with some other word that is better
243
- prompt = 'A farm'
244
- style_name = 'birb'
245
- conditioning_image_folder = './conditioning_images/'
246
- style_folder = './styles/'
247
-
248
- seedlist = [*range(0, 10000, 500)]
249
- print(seedlist)
250
-
251
- stylelist = ['birb', ]
252
-
253
- i = 0
254
- for style_path in glob.glob(os.path.join(style_folder, '*')):
255
- seed = seedlist[i]
256
- i = i + 1
257
- getImageWithStyle(prompt, style_path, None, seed)
258
- for conditioning_image in glob.glob(os.path.join(conditioning_image_folder, '*.jpg')):
259
- print("style_path:", style_path, "conditioning_image:", conditioning_image)
260
- getImageWithStyle(prompt, style_path, conditioning_image, seed)
261
-
 
 
 
 
262
  title = "Stable Diffusion SD Styles along with image conditioning"
263
  description = "Shows the Stable Diffusion usage with SD Styles as well as ways to condition using different loss aspects"
264
- examples = [["A farm", 'midjourney', '']]
265
  style_options = ['birb', 'moebius', 'midjourney', 'cute-game-style', 'depthmap', 'hitokomoru', 'lineart', 'madhubani']
266
  demo = gr.Interface(
267
  generateOutput,
268
  inputs = [
269
  gr.Textbox(),
270
  gr.Dropdown(choices=style_options, label="Choose the Style you want"),
271
- gr.Image(width=256, height=256, label="Image to use for Conditioning"),
272
  ],
273
  outputs = [
274
  gr.Image(width=256, height=256, label="Output"),
 
70
 
71
  def latent_loss(latent, conditioning_image):
72
  # How far are the image embeds from lossembeds:
73
+ # image = Image.open(conditioning_image)
74
+ r_image = conditioning_image.resize((512,512))
75
  r_latent = pil_to_latent(r_image)
76
  error = F.mse_loss(0.5*latent,0.5*r_latent)
77
  return error
78
  #Generating an image with these modified embeddings
79
 
80
 
81
+ def generate_with_embs(text_input, text_embeddings, conditioning_image):
82
  height = 512 # default height of Stable Diffusion
83
  width = 512 # default width of Stable Diffusion
84
  num_inference_steps = 30 # Number of denoising steps
85
  guidance_scale = 7.5 # Scale for classifier-free guidance
86
+
87
+ generator = torch.manual_seed(torch.seed()) # Seed generator to create the inital latent noise
88
  batch_size = 1
89
  loss_scale = 100 #@param
90
  imageCondSteps = 5
 
155
  return latents_to_pil(latents)[0]
156
 
157
 
158
+ def getImageWithStyle(prompt, style_name, conditioning_image):
159
  prompt = prompt + ' in the style of puppy'
160
 
161
+ style_loc = "styles/" + style_name + '/learned_embeds.bin'
162
  style_embed = torch.load(style_loc)
163
  # print(style_embed)
164
  # print(style_embed.keys(), style_embed[style_embed.keys()[0]].shape)
165
  new_style = list(style_embed.keys())[0]
 
 
 
 
 
 
 
 
 
 
166
 
167
  # Tokenize
168
  text_input = tokenizer(prompt, padding="max_length", max_length=tokenizer.model_max_length, truncation=True, return_tensors="pt")
 
195
  modified_output_embeddings = get_output_embeds(input_embeddings)
196
 
197
  # And generate an image with this:
198
+ image = generate_with_embs(text_input, modified_output_embeddings, conditioning_image)
199
+ return image
200
+ # name = "./Outputs/" + filename+".jpg"
201
 
202
+ # image.save(name)
203
 
204
 
205
  # Supress some unnecessary warnings when loading the CLIPTextModel
 
232
 
233
  # Used puppy as a placeholder here since the token is known
234
  # Will replace with some other word that is better
235
+ # prompt = 'A farm'
236
+ # style_name = 'birb'
237
+ # conditioning_image_folder = './conditioning_images/'
238
+ # style_folder = './styles/'
239
+
240
+ # seedlist = [*range(0, 10000, 500)]
241
+ # print(seedlist)
242
+
243
+ # stylelist = ['birb', ]
244
+
245
+ # i = 0
246
+ # for style_path in glob.glob(os.path.join(style_folder, '*')):
247
+ # seed = seedlist[i]
248
+ # i = i + 1
249
+ # getImageWithStyle(prompt, style_path, None, seed)
250
+ # for conditioning_image in glob.glob(os.path.join(conditioning_image_folder, '*.jpg')):
251
+ # print("style_path:", style_path, "conditioning_image:", conditioning_image)
252
+ # getImageWithStyle(prompt, style_path, conditioning_image, seed)
253
+
254
+ def generateOutput(prompt, style_name, conditioning_image):
255
+ outputImage = getImageWithStyle(prompt, style_name, conditioning_image)
256
+ return outputImage
257
+
258
  title = "Stable Diffusion SD Styles along with image conditioning"
259
  description = "Shows the Stable Diffusion usage with SD Styles as well as ways to condition using different loss aspects"
260
+ examples = [["A farm", 'midjourney', 'conditioning_images/indianflag.jpg'],["A playground", 'lineart', None]]
261
  style_options = ['birb', 'moebius', 'midjourney', 'cute-game-style', 'depthmap', 'hitokomoru', 'lineart', 'madhubani']
262
  demo = gr.Interface(
263
  generateOutput,
264
  inputs = [
265
  gr.Textbox(),
266
  gr.Dropdown(choices=style_options, label="Choose the Style you want"),
267
+ gr.Image(width=256, height=256, label="Image to use for Conditioning", type='pil'),
268
  ],
269
  outputs = [
270
  gr.Image(width=256, height=256, label="Output"),