Commit
·
96eb466
0
Parent(s):
Duplicate from google/ddpm-cifar10-32
Browse files- .gitattributes +28 -0
- README.md +59 -0
- config.json +36 -0
- diffusion_pytorch_model.bin +3 -0
- images/generated_image_0.png +0 -0
- images/generated_image_1.png +0 -0
- images/generated_image_2.png +0 -0
- images/generated_image_3.png +0 -0
- model_index.json +12 -0
- modeling_ddpm.py +60 -0
- scheduler_config.json +11 -0
.gitattributes
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
19 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
26 |
+
*.zstandard filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
diffusion_model.pt filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: apache-2.0
|
3 |
+
tags:
|
4 |
+
- pytorch
|
5 |
+
- diffusers
|
6 |
+
- unconditional-image-generation
|
7 |
+
duplicated_from: google/ddpm-cifar10-32
|
8 |
+
---
|
9 |
+
|
10 |
+
# Denoising Diffusion Probabilistic Models (DDPM)
|
11 |
+
|
12 |
+
**Paper**: [Denoising Diffusion Probabilistic Models](https://arxiv.org/abs/2006.11239)
|
13 |
+
|
14 |
+
**Authors**: Jonathan Ho, Ajay Jain, Pieter Abbeel
|
15 |
+
|
16 |
+
**Abstract**:
|
17 |
+
|
18 |
+
*We present high quality image synthesis results using diffusion probabilistic models, a class of latent variable models inspired by considerations from nonequilibrium thermodynamics. Our best results are obtained by training on a weighted variational bound designed according to a novel connection between diffusion probabilistic models and denoising score matching with Langevin dynamics, and our models naturally admit a progressive lossy decompression scheme that can be interpreted as a generalization of autoregressive decoding. On the unconditional CIFAR10 dataset, we obtain an Inception score of 9.46 and a state-of-the-art FID score of 3.17. On 256x256 LSUN, we obtain sample quality similar to ProgressiveGAN.*
|
19 |
+
|
20 |
+
## Inference
|
21 |
+
|
22 |
+
**DDPM** models can use *discrete noise schedulers* such as:
|
23 |
+
|
24 |
+
- [scheduling_ddpm](https://github.com/huggingface/diffusers/blob/main/src/diffusers/schedulers/scheduling_ddpm.py)
|
25 |
+
- [scheduling_ddim](https://github.com/huggingface/diffusers/blob/main/src/diffusers/schedulers/scheduling_ddim.py)
|
26 |
+
- [scheduling_pndm](https://github.com/huggingface/diffusers/blob/main/src/diffusers/schedulers/scheduling_pndm.py)
|
27 |
+
|
28 |
+
for inference. Note that while the *ddpm* scheduler yields the highest quality, it also takes the longest.
|
29 |
+
For a good trade-off between quality and inference speed you might want to consider the *ddim* or *pndm* schedulers instead.
|
30 |
+
|
31 |
+
See the following code:
|
32 |
+
|
33 |
+
```python
|
34 |
+
# !pip install diffusers
|
35 |
+
from diffusers import DDPMPipeline, DDIMPipeline, PNDMPipeline
|
36 |
+
|
37 |
+
model_id = "google/ddpm-cifar10-32"
|
38 |
+
|
39 |
+
# load model and scheduler
|
40 |
+
ddpm = DDPMPipeline.from_pretrained(model_id) # you can replace DDPMPipeline with DDIMPipeline or PNDMPipeline for faster inference
|
41 |
+
|
42 |
+
# run pipeline in inference (sample random noise and denoise)
|
43 |
+
image = ddpm().images[0]
|
44 |
+
|
45 |
+
# save image
|
46 |
+
image.save("ddpm_generated_image.png")
|
47 |
+
```
|
48 |
+
|
49 |
+
For more in-detail information, please have a look at the [official inference example](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/diffusers_intro.ipynb)
|
50 |
+
|
51 |
+
## Training
|
52 |
+
|
53 |
+
If you want to train your own model, please have a look at the [official training example](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/training_example.ipynb)
|
54 |
+
|
55 |
+
## Samples
|
56 |
+
1. 
|
57 |
+
2. 
|
58 |
+
3. 
|
59 |
+
4. 
|
config.json
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_class_name": "UNet2DModel",
|
3 |
+
"_diffusers_version": "0.0.4",
|
4 |
+
"act_fn": "silu",
|
5 |
+
"attention_head_dim": null,
|
6 |
+
"block_out_channels": [
|
7 |
+
128,
|
8 |
+
256,
|
9 |
+
256,
|
10 |
+
256
|
11 |
+
],
|
12 |
+
"center_input_sample": false,
|
13 |
+
"down_block_types": [
|
14 |
+
"DownBlock2D",
|
15 |
+
"AttnDownBlock2D",
|
16 |
+
"DownBlock2D",
|
17 |
+
"DownBlock2D"
|
18 |
+
],
|
19 |
+
"downsample_padding": 0,
|
20 |
+
"flip_sin_to_cos": false,
|
21 |
+
"freq_shift": 1,
|
22 |
+
"in_channels": 3,
|
23 |
+
"layers_per_block": 2,
|
24 |
+
"mid_block_scale_factor": 1,
|
25 |
+
"norm_eps": 1e-06,
|
26 |
+
"norm_num_groups": 32,
|
27 |
+
"out_channels": 3,
|
28 |
+
"sample_size": 32,
|
29 |
+
"time_embedding_type": "positional",
|
30 |
+
"up_block_types": [
|
31 |
+
"UpBlock2D",
|
32 |
+
"UpBlock2D",
|
33 |
+
"AttnUpBlock2D",
|
34 |
+
"UpBlock2D"
|
35 |
+
]
|
36 |
+
}
|
diffusion_pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ac3416a548879738893e935b42cab9119f51745b62bec4b6d8375e4d86e98ea4
|
3 |
+
size 143101489
|
images/generated_image_0.png
ADDED
![]() |
images/generated_image_1.png
ADDED
![]() |
images/generated_image_2.png
ADDED
![]() |
images/generated_image_3.png
ADDED
![]() |
model_index.json
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_class_name": "DDPMPipeline",
|
3 |
+
"_module": "modeling_ddpm.py",
|
4 |
+
"scheduler": [
|
5 |
+
"diffusers",
|
6 |
+
"DDPMScheduler"
|
7 |
+
],
|
8 |
+
"unet": [
|
9 |
+
"diffusers",
|
10 |
+
"UNet2DModel"
|
11 |
+
]
|
12 |
+
}
|
modeling_ddpm.py
ADDED
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2022 The HuggingFace Team. All rights reserved.
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
|
14 |
+
# limitations under the License.
|
15 |
+
|
16 |
+
|
17 |
+
from diffusers import DiffusionPipeline
|
18 |
+
import tqdm
|
19 |
+
import torch
|
20 |
+
|
21 |
+
|
22 |
+
class DDPM(DiffusionPipeline):
|
23 |
+
|
24 |
+
modeling_file = "modeling_ddpm.py"
|
25 |
+
|
26 |
+
def __init__(self, unet, noise_scheduler):
|
27 |
+
super().__init__()
|
28 |
+
self.register_modules(unet=unet, noise_scheduler=noise_scheduler)
|
29 |
+
|
30 |
+
def __call__(self, generator=None, torch_device=None):
|
31 |
+
torch_device = "cuda" if torch.cuda.is_available() else "cpu"
|
32 |
+
|
33 |
+
self.unet.to(torch_device)
|
34 |
+
# 1. Sample gaussian noise
|
35 |
+
image = self.noise_scheduler.sample_noise((1, self.unet.in_channels, self.unet.resolution, self.unet.resolution), device=torch_device, generator=generator)
|
36 |
+
for t in tqdm.tqdm(reversed(range(len(self.noise_scheduler))), total=len(self.noise_scheduler)):
|
37 |
+
# i) define coefficients for time step t
|
38 |
+
clip_image_coeff = 1 / torch.sqrt(self.noise_scheduler.get_alpha_prod(t))
|
39 |
+
clip_noise_coeff = torch.sqrt(1 / self.noise_scheduler.get_alpha_prod(t) - 1)
|
40 |
+
image_coeff = (1 - self.noise_scheduler.get_alpha_prod(t - 1)) * torch.sqrt(self.noise_scheduler.get_alpha(t)) / (1 - self.noise_scheduler.get_alpha_prod(t))
|
41 |
+
clip_coeff = torch.sqrt(self.noise_scheduler.get_alpha_prod(t - 1)) * self.noise_scheduler.get_beta(t) / (1 - self.noise_scheduler.get_alpha_prod(t))
|
42 |
+
|
43 |
+
# ii) predict noise residual
|
44 |
+
with torch.no_grad():
|
45 |
+
noise_residual = self.unet(image, t)
|
46 |
+
|
47 |
+
# iii) compute predicted image from residual
|
48 |
+
# See 2nd formula at https://github.com/hojonathanho/diffusion/issues/5#issue-896554416 for comparison
|
49 |
+
pred_mean = clip_image_coeff * image - clip_noise_coeff * noise_residual
|
50 |
+
pred_mean = torch.clamp(pred_mean, -1, 1)
|
51 |
+
prev_image = clip_coeff * pred_mean + image_coeff * image
|
52 |
+
|
53 |
+
# iv) sample variance
|
54 |
+
prev_variance = self.noise_scheduler.sample_variance(t, prev_image.shape, device=torch_device, generator=generator)
|
55 |
+
|
56 |
+
# v) sample x_{t-1} ~ N(prev_image, prev_variance)
|
57 |
+
sampled_prev_image = prev_image + prev_variance
|
58 |
+
image = sampled_prev_image
|
59 |
+
|
60 |
+
return image
|
scheduler_config.json
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_class_name": "DDPMScheduler",
|
3 |
+
"_diffusers_version": "0.1.1",
|
4 |
+
"beta_end": 0.02,
|
5 |
+
"beta_schedule": "linear",
|
6 |
+
"beta_start": 0.0001,
|
7 |
+
"clip_sample": true,
|
8 |
+
"num_train_timesteps": 1000,
|
9 |
+
"trained_betas": null,
|
10 |
+
"variance_type": "fixed_large"
|
11 |
+
}
|