Spaces:

Chintan-Shah
/

SD_Style_Exploration

Sleeping

App Files Files Community

Chintan-Shah commited on Aug 2, 2024

Commit

cd95943

verified ·

1 Parent(s): 1285101

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -40

app.py CHANGED Viewed

@@ -70,20 +70,21 @@ def get_output_embeds(input_embeddings):
 def latent_loss(latent, conditioning_image):
     # How far are the image embeds from lossembeds:
-    image = Image.open(conditioning_image)
-    r_image = image.resize((512,512))
     r_latent = pil_to_latent(r_image)
     error = F.mse_loss(0.5*latent,0.5*r_latent)
     return error
 #Generating an image with these modified embeddings
-def generate_with_embs(text_input, text_embeddings, conditioning_image, seed):
     height = 512                        # default height of Stable Diffusion
     width = 512                         # default width of Stable Diffusion
     num_inference_steps = 30            # Number of denoising steps
     guidance_scale = 7.5                # Scale for classifier-free guidance
-    generator = torch.manual_seed(seed)   # Seed generator to create the inital latent noise
     batch_size = 1
     loss_scale = 100 #@param
     imageCondSteps = 5
@@ -154,24 +155,14 @@ def generate_with_embs(text_input, text_embeddings, conditioning_image, seed):
     return latents_to_pil(latents)[0]
-def getImageWithStyle(prompt, style_path, conditioning_image, seed):
     prompt = prompt + ' in the style of puppy'
-    style_loc = style_path + '/learned_embeds.bin'
     style_embed = torch.load(style_loc)
     # print(style_embed)
     # print(style_embed.keys(), style_embed[style_embed.keys()[0]].shape)
     new_style = list(style_embed.keys())[0]
-    print("New style:", new_style)
-    filename_style = new_style.replace("<", "")
-    filename_style = filename_style.replace(">", "")
-    filename = prompt.replace("puppy",  filename_style)
-    if (conditioning_image):
-        imagename = Path(conditioning_image).stem
-        print("Conditioned imagename:", imagename)
-        filename = filename + "_" + imagename + "_conditioned"
-    print("filename:", filename)
     # Tokenize
     text_input = tokenizer(prompt, padding="max_length", max_length=tokenizer.model_max_length, truncation=True, return_tensors="pt")
@@ -204,10 +195,11 @@ def getImageWithStyle(prompt, style_path, conditioning_image, seed):
     modified_output_embeddings = get_output_embeds(input_embeddings)
     # And generate an image with this:
-    image = generate_with_embs(text_input, modified_output_embeddings, conditioning_image, seed)
-    name = "./Outputs/" + filename+".jpg"
-    image.save(name)
 # Supress some unnecessary warnings when loading the CLIPTextModel
@@ -240,35 +232,39 @@ image_encoder = image_encoder.to(torch_device)
 # Used puppy as a placeholder here since the token is known
 # Will replace with some other word that is better
-prompt = 'A farm'
-style_name = 'birb'
-conditioning_image_folder = './conditioning_images/'
-style_folder = './styles/'
-seedlist = [*range(0, 10000, 500)]
-print(seedlist)
-stylelist = ['birb', ]
-i = 0
-for style_path in glob.glob(os.path.join(style_folder, '*')):
-    seed = seedlist[i]
-    i = i + 1
-    getImageWithStyle(prompt, style_path, None, seed)
-    for conditioning_image in glob.glob(os.path.join(conditioning_image_folder, '*.jpg')):
-        print("style_path:", style_path, "conditioning_image:", conditioning_image)
-        getImageWithStyle(prompt, style_path, conditioning_image, seed)
 title = "Stable Diffusion SD Styles along with image conditioning"
 description = "Shows the Stable Diffusion usage with SD Styles as well as ways to condition using different loss aspects"
-examples = [["A farm", 'midjourney', '']]
 style_options = ['birb', 'moebius', 'midjourney', 'cute-game-style', 'depthmap', 'hitokomoru', 'lineart', 'madhubani']
 demo = gr.Interface(
     generateOutput,
     inputs = [
         gr.Textbox(),
         gr.Dropdown(choices=style_options, label="Choose the Style you want"),
-        gr.Image(width=256, height=256, label="Image to use for Conditioning"),
         ],
     outputs = [
         gr.Image(width=256, height=256, label="Output"),

 def latent_loss(latent, conditioning_image):
     # How far are the image embeds from lossembeds:
+    # image = Image.open(conditioning_image)
+    r_image = conditioning_image.resize((512,512))
     r_latent = pil_to_latent(r_image)
     error = F.mse_loss(0.5*latent,0.5*r_latent)
     return error
 #Generating an image with these modified embeddings
+def generate_with_embs(text_input, text_embeddings, conditioning_image):
     height = 512                        # default height of Stable Diffusion
     width = 512                         # default width of Stable Diffusion
     num_inference_steps = 30            # Number of denoising steps
     guidance_scale = 7.5                # Scale for classifier-free guidance
+    generator = torch.manual_seed(torch.seed())   # Seed generator to create the inital latent noise
     batch_size = 1
     loss_scale = 100 #@param
     imageCondSteps = 5
     return latents_to_pil(latents)[0]
+def getImageWithStyle(prompt, style_name, conditioning_image):
     prompt = prompt + ' in the style of puppy'
+    style_loc = "styles/" + style_name + '/learned_embeds.bin'
     style_embed = torch.load(style_loc)
     # print(style_embed)
     # print(style_embed.keys(), style_embed[style_embed.keys()[0]].shape)
     new_style = list(style_embed.keys())[0]
     # Tokenize
     text_input = tokenizer(prompt, padding="max_length", max_length=tokenizer.model_max_length, truncation=True, return_tensors="pt")
     modified_output_embeddings = get_output_embeds(input_embeddings)
     # And generate an image with this:
+    image = generate_with_embs(text_input, modified_output_embeddings, conditioning_image)
+    return image
+    # name = "./Outputs/" + filename+".jpg"
+    # image.save(name)
 # Supress some unnecessary warnings when loading the CLIPTextModel
 # Used puppy as a placeholder here since the token is known
 # Will replace with some other word that is better
+# prompt = 'A farm'
+# style_name = 'birb'
+# conditioning_image_folder = './conditioning_images/'
+# style_folder = './styles/'
+# seedlist = [*range(0, 10000, 500)]
+# print(seedlist)
+# stylelist = ['birb', ]
+# i = 0
+# for style_path in glob.glob(os.path.join(style_folder, '*')):
+#     seed = seedlist[i]
+#     i = i + 1
+#     getImageWithStyle(prompt, style_path, None, seed)
+#     for conditioning_image in glob.glob(os.path.join(conditioning_image_folder, '*.jpg')):
+#         print("style_path:", style_path, "conditioning_image:", conditioning_image)
+#         getImageWithStyle(prompt, style_path, conditioning_image, seed)
+def generateOutput(prompt, style_name, conditioning_image):
+    outputImage = getImageWithStyle(prompt, style_name, conditioning_image)
+    return outputImage
 title = "Stable Diffusion SD Styles along with image conditioning"
 description = "Shows the Stable Diffusion usage with SD Styles as well as ways to condition using different loss aspects"
+examples = [["A farm", 'midjourney', 'conditioning_images/indianflag.jpg'],["A playground", 'lineart', None]]
 style_options = ['birb', 'moebius', 'midjourney', 'cute-game-style', 'depthmap', 'hitokomoru', 'lineart', 'madhubani']
 demo = gr.Interface(
     generateOutput,
     inputs = [
         gr.Textbox(),
         gr.Dropdown(choices=style_options, label="Choose the Style you want"),
+        gr.Image(width=256, height=256, label="Image to use for Conditioning", type='pil'),
         ],
     outputs = [
         gr.Image(width=256, height=256, label="Output"),