Spaces:
Build error
Build error
Nupur Kumari
commited on
Commit
·
7b6145e
1
Parent(s):
c4a18be
update
Browse files- app.py +41 -17
- inference.py +2 -0
app.py
CHANGED
|
@@ -22,6 +22,15 @@ DESCRIPTION = '''This is a demo for [https://github.com/adobe-research/custom-di
|
|
| 22 |
It is recommended to upgrade to GPU in Settings after duplicating this space to use it.
|
| 23 |
<a href="https://huggingface.co/spaces/nupurkmr9/custom-diffusion?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
|
| 24 |
'''
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
ORIGINAL_SPACE_ID = 'nupurkmr9/custom-diffusion'
|
| 27 |
SPACE_ID = os.getenv('SPACE_ID', ORIGINAL_SPACE_ID)
|
|
@@ -74,38 +83,44 @@ def create_training_demo(trainer: Trainer,
|
|
| 74 |
with gr.Box():
|
| 75 |
gr.Markdown('Training Data')
|
| 76 |
concept_images = gr.Files(label='Images for your concept')
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
class_prompt = gr.Textbox(label='Regularization set Prompt',
|
| 80 |
max_lines=1, placeholder='Example: "cat"')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
gr.Markdown('''
|
| 82 |
-
-
|
| 83 |
-
-
|
| 84 |
-
- For a
|
|
|
|
|
|
|
| 85 |
''')
|
| 86 |
with gr.Box():
|
| 87 |
gr.Markdown('Training Parameters')
|
| 88 |
num_training_steps = gr.Number(
|
| 89 |
label='Number of Training Steps', value=1000, precision=0)
|
| 90 |
learning_rate = gr.Number(label='Learning Rate', value=0.00001)
|
| 91 |
-
train_text_encoder = gr.Checkbox(label='Train Text Encoder',
|
| 92 |
-
value=False)
|
| 93 |
-
modifier_token = gr.Checkbox(label='modifier token',
|
| 94 |
-
value=True)
|
| 95 |
batch_size = gr.Number(
|
| 96 |
label='batch_size', value=1, precision=0)
|
| 97 |
-
gradient_accumulation = gr.Number(
|
| 98 |
-
label='Number of Gradient Accumulation',
|
| 99 |
-
value=1,
|
| 100 |
-
precision=0)
|
| 101 |
with gr.Row():
|
| 102 |
use_8bit_adam = gr.Checkbox(label='Use 8bit Adam', value=True)
|
| 103 |
gradient_checkpointing = gr.Checkbox(label='Enable gradient checkpointing', value=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
gr.Markdown('''
|
| 105 |
- It will take about ~10 minutes to train for 1000 steps and ~21GB on a 3090 GPU.
|
| 106 |
-
- Our results in the paper are with
|
| 107 |
- Enable gradient checkpointing for lower memory requirements (~14GB) at the expense of slower backward pass.
|
| 108 |
-
- If "Train Text Encoder", disable "modifier token".
|
| 109 |
- Note that your trained models will be deleted when the second training is started. You can upload your trained model in the "Upload" tab.
|
| 110 |
''')
|
| 111 |
|
|
@@ -136,7 +151,8 @@ def create_training_demo(trainer: Trainer,
|
|
| 136 |
gradient_accumulation,
|
| 137 |
batch_size,
|
| 138 |
use_8bit_adam,
|
| 139 |
-
gradient_checkpointing
|
|
|
|
| 140 |
],
|
| 141 |
outputs=[
|
| 142 |
training_status,
|
|
@@ -174,6 +190,10 @@ def create_inference_demo(pipe: InferencePipeline) -> gr.Blocks:
|
|
| 174 |
value='CompVis/stable-diffusion-v1-4',
|
| 175 |
label='Base Model',
|
| 176 |
visible=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 177 |
reload_button = gr.Button('Reload Weight List')
|
| 178 |
weight_name = gr.Dropdown(choices=find_weight_files(),
|
| 179 |
value='custom-diffusion-models/cat.bin',
|
|
@@ -214,6 +234,7 @@ def create_inference_demo(pipe: InferencePipeline) -> gr.Blocks:
|
|
| 214 |
gr.Markdown('''
|
| 215 |
- Models with names starting with "custom-diffusion-models/" are the pretrained models provided in the [original repo](https://github.com/adobe-research/custom-diffusion), and the ones with names starting with "results/delta.bin" are your trained models.
|
| 216 |
- After training, you can press "Reload Weight List" button to load your trained model names.
|
|
|
|
| 217 |
''')
|
| 218 |
with gr.Column():
|
| 219 |
result = gr.Image(label='Result')
|
|
@@ -231,6 +252,7 @@ def create_inference_demo(pipe: InferencePipeline) -> gr.Blocks:
|
|
| 231 |
guidance_scale,
|
| 232 |
eta,
|
| 233 |
batch_size,
|
|
|
|
| 234 |
],
|
| 235 |
outputs=result,
|
| 236 |
queue=False)
|
|
@@ -244,6 +266,7 @@ def create_inference_demo(pipe: InferencePipeline) -> gr.Blocks:
|
|
| 244 |
guidance_scale,
|
| 245 |
eta,
|
| 246 |
batch_size,
|
|
|
|
| 247 |
],
|
| 248 |
outputs=result,
|
| 249 |
queue=False)
|
|
@@ -282,6 +305,7 @@ with gr.Blocks(css='style.css') as demo:
|
|
| 282 |
|
| 283 |
gr.Markdown(TITLE)
|
| 284 |
gr.Markdown(DESCRIPTION)
|
|
|
|
| 285 |
|
| 286 |
with gr.Tabs():
|
| 287 |
with gr.TabItem('Train'):
|
|
|
|
| 22 |
It is recommended to upgrade to GPU in Settings after duplicating this space to use it.
|
| 23 |
<a href="https://huggingface.co/spaces/nupurkmr9/custom-diffusion?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
|
| 24 |
'''
|
| 25 |
+
DETAILDESCRIPTION='''
|
| 26 |
+
Custom Diffusion allows you to fine-tune text-to-image diffusion models, such as Stable Diffusion, given a few images of a new concept (~4-20).
|
| 27 |
+
We fine-tune only a subset of model parameters, namely key and value projection matrices, in the cross-attention layers and the modifier token used to represent the object.
|
| 28 |
+
This also reduces the extra storage for each additional concept to 75MB.
|
| 29 |
+
Our method further allows you to use a combination of concepts. Demo for multiple concepts will be added soon.
|
| 30 |
+
<center>
|
| 31 |
+
<img src="https://huggingface.co/spaces/nupurkmr9/custom-diffusion/resolve/main/method.jpg" width="600" align="center" >
|
| 32 |
+
</center>
|
| 33 |
+
'''
|
| 34 |
|
| 35 |
ORIGINAL_SPACE_ID = 'nupurkmr9/custom-diffusion'
|
| 36 |
SPACE_ID = os.getenv('SPACE_ID', ORIGINAL_SPACE_ID)
|
|
|
|
| 83 |
with gr.Box():
|
| 84 |
gr.Markdown('Training Data')
|
| 85 |
concept_images = gr.Files(label='Images for your concept')
|
| 86 |
+
with gr.Row():
|
| 87 |
+
class_prompt = gr.Textbox(label='Class Prompt',
|
|
|
|
| 88 |
max_lines=1, placeholder='Example: "cat"')
|
| 89 |
+
with gr.Column():
|
| 90 |
+
modifier_token = gr.Checkbox(label='modifier token',
|
| 91 |
+
value=True)
|
| 92 |
+
train_text_encoder = gr.Checkbox(label='Train Text Encoder',
|
| 93 |
+
value=False)
|
| 94 |
+
concept_prompt = gr.Textbox(label='Concept Prompt',
|
| 95 |
+
max_lines=1, placeholder='Example: "photo of a \<new1\> cat"')
|
| 96 |
gr.Markdown('''
|
| 97 |
+
- We use "\<new1\>" modifier token in front of the concept, e.g., "\<new1\> cat". By default modifier_token is enabled.
|
| 98 |
+
- If "Train Text Encoder", disable "modifier token" and use any unique text to describe the concept e.g. "ktn cat".
|
| 99 |
+
- For a new concept an e.g. concept prompt is "photo of a \<new1\> cat" and "cat" for class prompt.
|
| 100 |
+
- For a style concept, use "painting in the style of \<new1\> art" for concept prompt and "art" for class prompt.
|
| 101 |
+
- Class prompt should be the object category.
|
| 102 |
''')
|
| 103 |
with gr.Box():
|
| 104 |
gr.Markdown('Training Parameters')
|
| 105 |
num_training_steps = gr.Number(
|
| 106 |
label='Number of Training Steps', value=1000, precision=0)
|
| 107 |
learning_rate = gr.Number(label='Learning Rate', value=0.00001)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
batch_size = gr.Number(
|
| 109 |
label='batch_size', value=1, precision=0)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
with gr.Row():
|
| 111 |
use_8bit_adam = gr.Checkbox(label='Use 8bit Adam', value=True)
|
| 112 |
gradient_checkpointing = gr.Checkbox(label='Enable gradient checkpointing', value=False)
|
| 113 |
+
with gr.Accordion('Other Parameters', open=False):
|
| 114 |
+
gradient_accumulation = gr.Number(
|
| 115 |
+
label='Number of Gradient Accumulation',
|
| 116 |
+
value=1,
|
| 117 |
+
precision=0)
|
| 118 |
+
gen_images = gr.Checkbox(label='Generated images as regularization',
|
| 119 |
+
value=False)
|
| 120 |
gr.Markdown('''
|
| 121 |
- It will take about ~10 minutes to train for 1000 steps and ~21GB on a 3090 GPU.
|
| 122 |
+
- Our results in the paper are trained with batch-size 4 (8 including class regularization samples).
|
| 123 |
- Enable gradient checkpointing for lower memory requirements (~14GB) at the expense of slower backward pass.
|
|
|
|
| 124 |
- Note that your trained models will be deleted when the second training is started. You can upload your trained model in the "Upload" tab.
|
| 125 |
''')
|
| 126 |
|
|
|
|
| 151 |
gradient_accumulation,
|
| 152 |
batch_size,
|
| 153 |
use_8bit_adam,
|
| 154 |
+
gradient_checkpointing,
|
| 155 |
+
gen_images
|
| 156 |
],
|
| 157 |
outputs=[
|
| 158 |
training_status,
|
|
|
|
| 190 |
value='CompVis/stable-diffusion-v1-4',
|
| 191 |
label='Base Model',
|
| 192 |
visible=True)
|
| 193 |
+
resolution = gr.Dropdown(choices=[512, 768],
|
| 194 |
+
value=512,
|
| 195 |
+
label='Resolution',
|
| 196 |
+
visible=True)
|
| 197 |
reload_button = gr.Button('Reload Weight List')
|
| 198 |
weight_name = gr.Dropdown(choices=find_weight_files(),
|
| 199 |
value='custom-diffusion-models/cat.bin',
|
|
|
|
| 234 |
gr.Markdown('''
|
| 235 |
- Models with names starting with "custom-diffusion-models/" are the pretrained models provided in the [original repo](https://github.com/adobe-research/custom-diffusion), and the ones with names starting with "results/delta.bin" are your trained models.
|
| 236 |
- After training, you can press "Reload Weight List" button to load your trained model names.
|
| 237 |
+
- Change default batch-size and steps for faster sampling.
|
| 238 |
''')
|
| 239 |
with gr.Column():
|
| 240 |
result = gr.Image(label='Result')
|
|
|
|
| 252 |
guidance_scale,
|
| 253 |
eta,
|
| 254 |
batch_size,
|
| 255 |
+
resolution
|
| 256 |
],
|
| 257 |
outputs=result,
|
| 258 |
queue=False)
|
|
|
|
| 266 |
guidance_scale,
|
| 267 |
eta,
|
| 268 |
batch_size,
|
| 269 |
+
resolution
|
| 270 |
],
|
| 271 |
outputs=result,
|
| 272 |
queue=False)
|
|
|
|
| 305 |
|
| 306 |
gr.Markdown(TITLE)
|
| 307 |
gr.Markdown(DESCRIPTION)
|
| 308 |
+
gr.Markdown(DETAILDESCRIPTION)
|
| 309 |
|
| 310 |
with gr.Tabs():
|
| 311 |
with gr.TabItem('Train'):
|
inference.py
CHANGED
|
@@ -61,6 +61,7 @@ class InferencePipeline:
|
|
| 61 |
guidance_scale: float,
|
| 62 |
eta: float,
|
| 63 |
batch_size: int,
|
|
|
|
| 64 |
) -> PIL.Image.Image:
|
| 65 |
if not torch.cuda.is_available():
|
| 66 |
raise gr.Error('CUDA is not available.')
|
|
@@ -71,6 +72,7 @@ class InferencePipeline:
|
|
| 71 |
out = self.pipe([prompt]*batch_size,
|
| 72 |
num_inference_steps=n_steps,
|
| 73 |
guidance_scale=guidance_scale,
|
|
|
|
| 74 |
eta = eta,
|
| 75 |
generator=generator) # type: ignore
|
| 76 |
out = out.images
|
|
|
|
| 61 |
guidance_scale: float,
|
| 62 |
eta: float,
|
| 63 |
batch_size: int,
|
| 64 |
+
resolution: int,
|
| 65 |
) -> PIL.Image.Image:
|
| 66 |
if not torch.cuda.is_available():
|
| 67 |
raise gr.Error('CUDA is not available.')
|
|
|
|
| 72 |
out = self.pipe([prompt]*batch_size,
|
| 73 |
num_inference_steps=n_steps,
|
| 74 |
guidance_scale=guidance_scale,
|
| 75 |
+
height=resolution, width=resolution,
|
| 76 |
eta = eta,
|
| 77 |
generator=generator) # type: ignore
|
| 78 |
out = out.images
|