legoandmars commited on
Commit
a57fa2f
·
1 Parent(s): 0626a14

switch to inpainting

Browse files
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ __pycache__/
2
+ *.egg-info/
3
+ .DS_Store
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2021 OpenAI
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
- title: Glide Text2im
3
- emoji: 📊
4
- colorFrom: purple
5
- colorTo: gray
6
  sdk: gradio
7
  app_file: app.py
8
  pinned: false
 
1
  ---
2
+ title: GLIDE_Inpaint
3
+ emoji: 💻
4
+ colorFrom: green
5
+ colorTo: purple
6
  sdk: gradio
7
  app_file: app.py
8
  pinned: false
app.py CHANGED
@@ -1,14 +1,18 @@
 
 
1
 
2
- import os
3
- os.system('pip install -e .')
4
- import gradio as gr
5
 
6
- import base64
7
- from io import BytesIO
8
- # from fastapi import FastAPI
9
 
 
10
  from PIL import Image
 
 
 
11
  import torch as th
 
12
 
13
  from glide_text2im.download import load_checkpoint
14
  from glide_text2im.model_creation import (
@@ -17,8 +21,12 @@ from glide_text2im.model_creation import (
17
  model_and_diffusion_defaults_upsampler
18
  )
19
 
20
- # print("Loading models...")
21
- # app = FastAPI()
 
 
 
 
22
 
23
  # This notebook supports both CPU and GPU.
24
  # On CPU, generating one sample may take on the order of 20 minutes.
@@ -29,6 +37,7 @@ device = th.device('cpu' if not has_cuda else 'cuda')
29
 
30
  # Create base model.
31
  options = model_and_diffusion_defaults()
 
32
  options['use_fp16'] = has_cuda
33
  options['timestep_respacing'] = '100' # use 100 diffusion steps for fast sampling
34
  model, diffusion = create_model_and_diffusion(**options)
@@ -36,11 +45,12 @@ model.eval()
36
  if has_cuda:
37
  model.convert_to_fp16()
38
  model.to(device)
39
- model.load_state_dict(load_checkpoint('base', device))
40
  print('total base parameters', sum(x.numel() for x in model.parameters()))
41
 
42
  # Create upsampler model.
43
  options_up = model_and_diffusion_defaults_upsampler()
 
44
  options_up['use_fp16'] = has_cuda
45
  options_up['timestep_respacing'] = 'fast27' # use 27 diffusion steps for very fast sampling
46
  model_up, diffusion_up = create_model_and_diffusion(**options_up)
@@ -48,20 +58,18 @@ model_up.eval()
48
  if has_cuda:
49
  model_up.convert_to_fp16()
50
  model_up.to(device)
51
- model_up.load_state_dict(load_checkpoint('upsample', device))
52
  print('total upsampler parameters', sum(x.numel() for x in model_up.parameters()))
53
 
 
 
 
54
 
55
- def get_images(batch: th.Tensor):
56
- """ Display a batch of images inline. """
57
- scaled = ((batch + 1)*127.5).round().clamp(0,255).to(th.uint8).cpu()
58
- reshaped = scaled.permute(2, 0, 3, 1).reshape([batch.shape[2], -1, 3])
59
- return Image.fromarray(reshaped.numpy())
60
-
61
-
62
- # Create a classifier-free guidance sampling function
63
- guidance_scale = 3.0
64
 
 
65
  def model_fn(x_t, ts, **kwargs):
66
  half = x_t[: len(x_t) // 2]
67
  combined = th.cat([half, half], dim=0)
@@ -72,19 +80,69 @@ def model_fn(x_t, ts, **kwargs):
72
  eps = th.cat([half_eps, half_eps], dim=0)
73
  return th.cat([eps, rest], dim=1)
74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
- # @app.get("/")
77
- def read_root():
78
- return {"glide!"}
 
 
79
 
80
- # @app.get("/{generate}")
81
- def sample(prompt):
82
- # Sampling parameters
83
- batch_size = 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
 
85
- # Tune this parameter to control the sharpness of 256x256 images.
86
- # A value of 1.0 is sharper, but sometimes results in grainy artifacts.
87
- upsample_temp = 0.997
88
 
89
  ##############################
90
  # Sample from the base model #
@@ -103,6 +161,7 @@ def sample(prompt):
103
  )
104
 
105
  # Pack the tokens together into model kwargs.
 
106
  model_kwargs = dict(
107
  tokens=th.tensor(
108
  [tokens] * batch_size + [uncond_tokens] * batch_size, device=device
@@ -112,6 +171,10 @@ def sample(prompt):
112
  dtype=th.bool,
113
  device=device,
114
  ),
 
 
 
 
115
  )
116
 
117
  # Sample from the base model.
@@ -124,10 +187,10 @@ def sample(prompt):
124
  progress=True,
125
  model_kwargs=model_kwargs,
126
  cond_fn=None,
 
127
  )[:batch_size]
128
  model.del_cache()
129
 
130
-
131
  ##############################
132
  # Upsample the 64x64 samples #
133
  ##############################
@@ -151,12 +214,16 @@ def sample(prompt):
151
  dtype=th.bool,
152
  device=device,
153
  ),
 
 
 
 
154
  )
155
 
156
  # Sample from the base model.
157
  model_up.del_cache()
158
  up_shape = (batch_size, 3, options_up["image_size"], options_up["image_size"])
159
- up_samples = diffusion_up.ddim_sample_loop(
160
  model_up,
161
  up_shape,
162
  noise=th.randn(up_shape, device=device) * upsample_temp,
@@ -165,32 +232,30 @@ def sample(prompt):
165
  progress=True,
166
  model_kwargs=model_kwargs,
167
  cond_fn=None,
 
168
  )[:batch_size]
169
  model_up.del_cache()
170
 
171
- # Show the output
172
- image = get_images(up_samples)
173
- # image = to_base64(image)
174
- # return {"image": image}
175
- return image
176
-
177
-
178
- def to_base64(pil_image):
179
- buffered = BytesIO()
180
- pil_image.save(buffered, format="JPEG")
181
- return base64.b64encode(buffered.getvalue())
182
-
183
- title = "Interactive demo: glide-text2im"
184
- description = "Demo for OpenAI's GLIDE: Towards Photorealistic Image Generation and Editing with Text-Guided Diffusion Models."
185
- article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2112.10741'>GLIDE: Towards Photorealistic Image Generation and Editing with Text-Guided Diffusion Models</a> | <a href='https://github.com/openai/glide-text2im/'>Official Repo</a></p>"
186
- examples =["an oil painting of a corgi"]
187
-
188
- iface = gr.Interface(fn=sample,
189
- inputs=gr.inputs.Textbox(label='What would you like to see?'),
190
- outputs=gr.outputs.Image(type="pil", label="Model input + completions"),
191
- title=title,
192
- description=description,
193
- article=article,
194
- examples=examples,
195
  enable_queue=True)
196
- iface.launch(debug=True)
 
1
+ import subprocess
2
+ subprocess.run('pip install -e .', shell=True)
3
 
4
+ print("Installed the repo!")
 
 
5
 
6
+ # GLIDE imports
7
+ from typing import Tuple
 
8
 
9
+ from IPython.display import display
10
  from PIL import Image
11
+ import PIL
12
+ import PIL.ImageOps
13
+ import numpy as np
14
  import torch as th
15
+ import torch.nn.functional as F
16
 
17
  from glide_text2im.download import load_checkpoint
18
  from glide_text2im.model_creation import (
 
21
  model_and_diffusion_defaults_upsampler
22
  )
23
 
24
+ # gradio app imports
25
+ import gradio as gr
26
+
27
+ from torchvision.transforms import ToTensor, ToPILImage
28
+ image_to_tensor = ToTensor()
29
+ tensor_to_image = ToPILImage()
30
 
31
  # This notebook supports both CPU and GPU.
32
  # On CPU, generating one sample may take on the order of 20 minutes.
 
37
 
38
  # Create base model.
39
  options = model_and_diffusion_defaults()
40
+ options['inpaint'] = True
41
  options['use_fp16'] = has_cuda
42
  options['timestep_respacing'] = '100' # use 100 diffusion steps for fast sampling
43
  model, diffusion = create_model_and_diffusion(**options)
 
45
  if has_cuda:
46
  model.convert_to_fp16()
47
  model.to(device)
48
+ model.load_state_dict(load_checkpoint('base-inpaint', device))
49
  print('total base parameters', sum(x.numel() for x in model.parameters()))
50
 
51
  # Create upsampler model.
52
  options_up = model_and_diffusion_defaults_upsampler()
53
+ options_up['inpaint'] = True
54
  options_up['use_fp16'] = has_cuda
55
  options_up['timestep_respacing'] = 'fast27' # use 27 diffusion steps for very fast sampling
56
  model_up, diffusion_up = create_model_and_diffusion(**options_up)
 
58
  if has_cuda:
59
  model_up.convert_to_fp16()
60
  model_up.to(device)
61
+ model_up.load_state_dict(load_checkpoint('upsample-inpaint', device))
62
  print('total upsampler parameters', sum(x.numel() for x in model_up.parameters()))
63
 
64
+ # Sampling parameters
65
+ batch_size = 1
66
+ guidance_scale = 5.0
67
 
68
+ # Tune this parameter to control the sharpness of 256x256 images.
69
+ # A value of 1.0 is sharper, but sometimes results in grainy artifacts.
70
+ upsample_temp = 0.997
 
 
 
 
 
 
71
 
72
+ # Create an classifier-free guidance sampling function
73
  def model_fn(x_t, ts, **kwargs):
74
  half = x_t[: len(x_t) // 2]
75
  combined = th.cat([half, half], dim=0)
 
80
  eps = th.cat([half_eps, half_eps], dim=0)
81
  return th.cat([eps, rest], dim=1)
82
 
83
+ def denoised_fn(x_start):
84
+ # Force the model to have the exact right x_start predictions
85
+ # for the part of the image which is known.
86
+ return (
87
+ x_start * (1 - model_kwargs['inpaint_mask'])
88
+ + model_kwargs['inpaint_image'] * model_kwargs['inpaint_mask']
89
+ )
90
+
91
+ def show_images(batch: th.Tensor):
92
+ """ Display a batch of images inline. """
93
+ scaled = ((batch + 1)*127.5).round().clamp(0,255).to(th.uint8).cpu()
94
+ reshaped = scaled.permute(2, 0, 3, 1).reshape([batch.shape[2], -1, 3])
95
+ return Image.fromarray(reshaped.numpy())
96
+
97
+ def read_image(path: str, size: int = 256) -> Tuple[th.Tensor, th.Tensor]:
98
+ pil_img = Image.open(path).convert('RGB')
99
+ pil_img = pil_img.resize((size, size), resample=Image.BICUBIC)
100
+ img = np.array(pil_img)
101
+ return th.from_numpy(img)[None].permute(0, 3, 1, 2).float() / 127.5 - 1
102
 
103
+ def read_mask(path: str, size: int = 256) -> Tuple[th.Tensor, th.Tensor]:
104
+ #pil_img = PIL.Image.open(path).convert('L')
105
+ pil_img_full = PIL.Image.open(path).convert('RGBA')
106
+ #image = Image.open( inputImagePath ).convert( 'RGBA' )
107
+ pil_img = pil_img_full.getchannel( 'A' ) # Mode 'L'
108
 
109
+ # pil_img = PIL.ImageOps.invert(pil_img)
110
+ pil_img = pil_img.resize((size, size), resample=PIL.Image.BICUBIC)
111
+ img = np.array(pil_img)[..., np.newaxis]
112
+ return th.from_numpy(img)[None].permute(0, 3, 1, 2).float() / 255.0
113
+
114
+
115
+ def pil_to_numpy(pil_img: Image) -> Tuple[th.Tensor, th.Tensor]:
116
+ img = np.array(pil_img)
117
+ return th.from_numpy(img)[None].permute(0, 3, 1, 2).float() / 127.5 - 1
118
+
119
+ model_kwargs = dict()
120
+ def inpaint(input_img, input_img_with_mask, prompt):
121
+
122
+ print(prompt)
123
+
124
+ # Save as png for later mask detection :)
125
+ input_img_256 = input_img.convert('RGB').resize((256, 256), resample=Image.BICUBIC)
126
+ input_img_64 = input_img.convert('RGB').resize((64, 64), resample=Image.BICUBIC)
127
+
128
+ input_img_with_mask_64 = input_img.convert('RGBA').getchannel('A').resize((64, 64), resample=Image.BICUBIC)
129
+
130
+ # Source image we are inpainting
131
+ source_image_256 = pil_to_numpy(input_img_256)
132
+ source_image_64 = pil_to_numpy(input_img_64)
133
+
134
+ # Since gradio doesn't supply which pixels were drawn, we need to find it ourselves!
135
+ # Assuming that all black pixels are meant for inpainting.
136
+ # input_img_with_mask_64 = input_img_with_mask.convert('L').resize((64, 64), resample=Image.BICUBIC)
137
+ gray_scale_source_image = image_to_tensor(input_img_with_mask_64)
138
+ source_mask_64 = (gray_scale_source_image!=0).float()
139
+ source_mask_64_img = tensor_to_image(source_mask_64)
140
+
141
+ # The mask should always be a boolean 64x64 mask, and then we
142
+ # can upsample it for the second stage.
143
+ source_mask_64 = source_mask_64.unsqueeze(0)
144
+ source_mask_256 = F.interpolate(source_mask_64, (256, 256), mode='nearest')
145
 
 
 
 
146
 
147
  ##############################
148
  # Sample from the base model #
 
161
  )
162
 
163
  # Pack the tokens together into model kwargs.
164
+ global model_kwargs
165
  model_kwargs = dict(
166
  tokens=th.tensor(
167
  [tokens] * batch_size + [uncond_tokens] * batch_size, device=device
 
171
  dtype=th.bool,
172
  device=device,
173
  ),
174
+
175
+ # Masked inpainting image
176
+ inpaint_image=(source_image_64 * source_mask_64).repeat(full_batch_size, 1, 1, 1).to(device),
177
+ inpaint_mask=source_mask_64.repeat(full_batch_size, 1, 1, 1).to(device),
178
  )
179
 
180
  # Sample from the base model.
 
187
  progress=True,
188
  model_kwargs=model_kwargs,
189
  cond_fn=None,
190
+ denoised_fn=denoised_fn,
191
  )[:batch_size]
192
  model.del_cache()
193
 
 
194
  ##############################
195
  # Upsample the 64x64 samples #
196
  ##############################
 
214
  dtype=th.bool,
215
  device=device,
216
  ),
217
+
218
+ # Masked inpainting image.
219
+ inpaint_image=(source_image_256 * source_mask_256).repeat(batch_size, 1, 1, 1).to(device),
220
+ inpaint_mask=source_mask_256.repeat(batch_size, 1, 1, 1).to(device),
221
  )
222
 
223
  # Sample from the base model.
224
  model_up.del_cache()
225
  up_shape = (batch_size, 3, options_up["image_size"], options_up["image_size"])
226
+ up_samples = diffusion_up.p_sample_loop(
227
  model_up,
228
  up_shape,
229
  noise=th.randn(up_shape, device=device) * upsample_temp,
 
232
  progress=True,
233
  model_kwargs=model_kwargs,
234
  cond_fn=None,
235
+ denoised_fn=denoised_fn,
236
  )[:batch_size]
237
  model_up.del_cache()
238
 
239
+ return source_mask_64_img, show_images(up_samples)
240
+
241
+ gradio_inputs = [gr.inputs.Image(type='pil',
242
+ label="Input Image"),
243
+ gr.inputs.Image(type='pil',
244
+ label="Input Image With Mask"),
245
+ gr.inputs.Textbox(label='Conditional Text to Inpaint')]
246
+
247
+ # gradio_outputs = [gr.outputs.Image(label='Auto-Detected Mask (From drawn black pixels)')]
248
+
249
+ gradio_outputs = [gr.outputs.Image(label='Auto-Detected Mask (From drawn black pixels)'),
250
+ gr.outputs.Image(label='Inpainted Image')]
251
+ examples = [['grass.png', 'grass_with_mask.png', 'a corgi in a field']]
252
+
253
+ title = "GLIDE Inpaint"
254
+ description = "[WARNING: Queue times may take 4-6 minutes per person if there's no GPU! If there is a GPU, it'll take around 60 seconds] Using GLIDE to inpaint black regions of an input image! Instructions: 1) For the 'Input Image', upload an image. 2) For the 'Input Image with Mask', draw a black-colored mask (either manually with something like Paint, or by using gradio's built-in image editor & add a black-colored shape) IT MUST BE BLACK COLOR, but doesn't have to be rectangular! This is because it auto-detects the mask based on 0 (black) pixel values! 3) For the Conditional Text, type something you'd like to see the black region get filled in with :)"
255
+ article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2112.10741' target='_blank'>GLIDE: Towards Photorealistic Image Generation and Editing with Text-Guided Diffusion Models</a> | <a href='https://github.com/openai/glide-text2im' target='_blank'>Github Repo</a> | <img src='https://visitor-badge.glitch.me/badge?page_id=epoching_glide_inpaint' alt='visitor badge'></p>"
256
+ iface = gr.Interface(fn=inpaint, inputs=gradio_inputs,
257
+ outputs=gradio_outputs,
258
+ examples=examples, title=title,
259
+ description=description, article=article,
 
 
 
260
  enable_queue=True)
261
+ iface.launch()
grass.png ADDED
grass_with_mask.png ADDED
notebooks/clip_guided.ipynb CHANGED
@@ -1,5 +1,16 @@
1
  {
2
  "cells": [
 
 
 
 
 
 
 
 
 
 
 
3
  {
4
  "cell_type": "code",
5
  "execution_count": null,
@@ -227,7 +238,8 @@
227
  "nbconvert_exporter": "python",
228
  "pygments_lexer": "ipython3",
229
  "version": "3.7.3"
230
- }
 
231
  },
232
  "nbformat": 4,
233
  "nbformat_minor": 2
 
1
  {
2
  "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "# Run this line in Colab to install the package if it is\n",
10
+ "# not already installed.\n",
11
+ "!pip install git+https://github.com/openai/glide-text2im"
12
+ ]
13
+ },
14
  {
15
  "cell_type": "code",
16
  "execution_count": null,
 
238
  "nbconvert_exporter": "python",
239
  "pygments_lexer": "ipython3",
240
  "version": "3.7.3"
241
+ },
242
+ "accelerator": "GPU"
243
  },
244
  "nbformat": 4,
245
  "nbformat_minor": 2
notebooks/inpaint.ipynb CHANGED
@@ -1,5 +1,16 @@
1
  {
2
  "cells": [
 
 
 
 
 
 
 
 
 
 
 
3
  {
4
  "cell_type": "code",
5
  "execution_count": null,
@@ -283,7 +294,8 @@
283
  "nbconvert_exporter": "python",
284
  "pygments_lexer": "ipython3",
285
  "version": "3.7.3"
286
- }
 
287
  },
288
  "nbformat": 4,
289
  "nbformat_minor": 2
 
1
  {
2
  "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "# Run this line in Colab to install the package if it is\n",
10
+ "# not already installed.\n",
11
+ "!pip install git+https://github.com/openai/glide-text2im"
12
+ ]
13
+ },
14
  {
15
  "cell_type": "code",
16
  "execution_count": null,
 
294
  "nbconvert_exporter": "python",
295
  "pygments_lexer": "ipython3",
296
  "version": "3.7.3"
297
+ },
298
+ "accelerator": "GPU"
299
  },
300
  "nbformat": 4,
301
  "nbformat_minor": 2
notebooks/text2im.ipynb CHANGED
@@ -1,5 +1,16 @@
1
  {
2
  "cells": [
 
 
 
 
 
 
 
 
 
 
 
3
  {
4
  "cell_type": "code",
5
  "execution_count": null,
@@ -232,7 +243,8 @@
232
  "nbconvert_exporter": "python",
233
  "pygments_lexer": "ipython3",
234
  "version": "3.7.3"
235
- }
 
236
  },
237
  "nbformat": 4,
238
  "nbformat_minor": 2
 
1
  {
2
  "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "# Run this line in Colab to install the package if it is\n",
10
+ "# not already installed.\n",
11
+ "!pip install git+https://github.com/openai/glide-text2im"
12
+ ]
13
+ },
14
  {
15
  "cell_type": "code",
16
  "execution_count": null,
 
243
  "nbconvert_exporter": "python",
244
  "pygments_lexer": "ipython3",
245
  "version": "3.7.3"
246
+ },
247
+ "accelerator": "GPU"
248
  },
249
  "nbformat": 4,
250
  "nbformat_minor": 2
ocean.jpg ADDED
ocean_with_mask.png ADDED
requirements.txt CHANGED
@@ -1,4 +1,2 @@
1
- git+https://github.com/openai/glide-text2im.git
2
- fastapi
3
- uvicorn
4
- regex
 
1
+ gradio
2
+ torchvision
 
 
server.py DELETED
@@ -1,175 +0,0 @@
1
- import base64
2
- from io import BytesIO
3
- from fastapi import FastAPI
4
-
5
- from PIL import Image
6
- import torch as th
7
-
8
- from glide_text2im.download import load_checkpoint
9
- from glide_text2im.model_creation import (
10
- create_model_and_diffusion,
11
- model_and_diffusion_defaults,
12
- model_and_diffusion_defaults_upsampler
13
- )
14
-
15
- print("Loading models...")
16
- app = FastAPI()
17
-
18
- # This notebook supports both CPU and GPU.
19
- # On CPU, generating one sample may take on the order of 20 minutes.
20
- # On a GPU, it should be under a minute.
21
-
22
- has_cuda = th.cuda.is_available()
23
- device = th.device('cpu' if not has_cuda else 'cuda')
24
-
25
- # Create base model.
26
- options = model_and_diffusion_defaults()
27
- options['use_fp16'] = has_cuda
28
- options['timestep_respacing'] = '100' # use 100 diffusion steps for fast sampling
29
- model, diffusion = create_model_and_diffusion(**options)
30
- model.eval()
31
- if has_cuda:
32
- model.convert_to_fp16()
33
- model.to(device)
34
- model.load_state_dict(load_checkpoint('base', device))
35
- print('total base parameters', sum(x.numel() for x in model.parameters()))
36
-
37
- # Create upsampler model.
38
- options_up = model_and_diffusion_defaults_upsampler()
39
- options_up['use_fp16'] = has_cuda
40
- options_up['timestep_respacing'] = 'fast27' # use 27 diffusion steps for very fast sampling
41
- model_up, diffusion_up = create_model_and_diffusion(**options_up)
42
- model_up.eval()
43
- if has_cuda:
44
- model_up.convert_to_fp16()
45
- model_up.to(device)
46
- model_up.load_state_dict(load_checkpoint('upsample', device))
47
- print('total upsampler parameters', sum(x.numel() for x in model_up.parameters()))
48
-
49
-
50
- def get_images(batch: th.Tensor):
51
- """ Display a batch of images inline. """
52
- scaled = ((batch + 1)*127.5).round().clamp(0,255).to(th.uint8).cpu()
53
- reshaped = scaled.permute(2, 0, 3, 1).reshape([batch.shape[2], -1, 3])
54
- Image.fromarray(reshaped.numpy())
55
-
56
-
57
- # Create a classifier-free guidance sampling function
58
- guidance_scale = 3.0
59
-
60
- def model_fn(x_t, ts, **kwargs):
61
- half = x_t[: len(x_t) // 2]
62
- combined = th.cat([half, half], dim=0)
63
- model_out = model(combined, ts, **kwargs)
64
- eps, rest = model_out[:, :3], model_out[:, 3:]
65
- cond_eps, uncond_eps = th.split(eps, len(eps) // 2, dim=0)
66
- half_eps = uncond_eps + guidance_scale * (cond_eps - uncond_eps)
67
- eps = th.cat([half_eps, half_eps], dim=0)
68
- return th.cat([eps, rest], dim=1)
69
-
70
-
71
- @app.get("/")
72
- def read_root():
73
- return {"glide!"}
74
-
75
- @app.get("/{generate}")
76
- def sample(prompt):
77
- # Sampling parameters
78
- batch_size = 1
79
-
80
- # Tune this parameter to control the sharpness of 256x256 images.
81
- # A value of 1.0 is sharper, but sometimes results in grainy artifacts.
82
- upsample_temp = 0.997
83
-
84
- ##############################
85
- # Sample from the base model #
86
- ##############################
87
-
88
- # Create the text tokens to feed to the model.
89
- tokens = model.tokenizer.encode(prompt)
90
- tokens, mask = model.tokenizer.padded_tokens_and_mask(
91
- tokens, options['text_ctx']
92
- )
93
-
94
- # Create the classifier-free guidance tokens (empty)
95
- full_batch_size = batch_size * 2
96
- uncond_tokens, uncond_mask = model.tokenizer.padded_tokens_and_mask(
97
- [], options['text_ctx']
98
- )
99
-
100
- # Pack the tokens together into model kwargs.
101
- model_kwargs = dict(
102
- tokens=th.tensor(
103
- [tokens] * batch_size + [uncond_tokens] * batch_size, device=device
104
- ),
105
- mask=th.tensor(
106
- [mask] * batch_size + [uncond_mask] * batch_size,
107
- dtype=th.bool,
108
- device=device,
109
- ),
110
- )
111
-
112
- # Sample from the base model.
113
- model.del_cache()
114
- samples = diffusion.p_sample_loop(
115
- model_fn,
116
- (full_batch_size, 3, options["image_size"], options["image_size"]),
117
- device=device,
118
- clip_denoised=True,
119
- progress=True,
120
- model_kwargs=model_kwargs,
121
- cond_fn=None,
122
- )[:batch_size]
123
- model.del_cache()
124
-
125
-
126
- ##############################
127
- # Upsample the 64x64 samples #
128
- ##############################
129
-
130
- tokens = model_up.tokenizer.encode(prompt)
131
- tokens, mask = model_up.tokenizer.padded_tokens_and_mask(
132
- tokens, options_up['text_ctx']
133
- )
134
-
135
- # Create the model conditioning dict.
136
- model_kwargs = dict(
137
- # Low-res image to upsample.
138
- low_res=((samples+1)*127.5).round()/127.5 - 1,
139
-
140
- # Text tokens
141
- tokens=th.tensor(
142
- [tokens] * batch_size, device=device
143
- ),
144
- mask=th.tensor(
145
- [mask] * batch_size,
146
- dtype=th.bool,
147
- device=device,
148
- ),
149
- )
150
-
151
- # Sample from the base model.
152
- model_up.del_cache()
153
- up_shape = (batch_size, 3, options_up["image_size"], options_up["image_size"])
154
- up_samples = diffusion_up.ddim_sample_loop(
155
- model_up,
156
- up_shape,
157
- noise=th.randn(up_shape, device=device) * upsample_temp,
158
- device=device,
159
- clip_denoised=True,
160
- progress=True,
161
- model_kwargs=model_kwargs,
162
- cond_fn=None,
163
- )[:batch_size]
164
- model_up.del_cache()
165
-
166
- # Show the output
167
- image = get_images(up_samples)
168
- image = to_base64(image)
169
- return {"image": image}
170
-
171
-
172
- def to_base64(pil_image):
173
- buffered = BytesIO()
174
- pil_image.save(buffered, format="JPEG")
175
- return base64.b64encode(buffered.getvalue())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
setup.py CHANGED
@@ -2,7 +2,19 @@ from setuptools import setup
2
 
3
  setup(
4
  name="glide-text2im",
5
- packages=["glide_text2im"],
 
 
 
 
 
 
 
 
 
 
 
 
6
  install_requires=[
7
  "Pillow",
8
  "attrs",
@@ -10,6 +22,8 @@ setup(
10
  "filelock",
11
  "requests",
12
  "tqdm",
 
 
13
  ],
14
  author="OpenAI",
15
  )
 
2
 
3
  setup(
4
  name="glide-text2im",
5
+ packages=[
6
+ "glide_text2im",
7
+ "glide_text2im.clip",
8
+ "glide_text2im.tokenizer",
9
+ ],
10
+ package_data={
11
+ "glide_text2im.tokenizer": [
12
+ "bpe_simple_vocab_16e6.txt.gz",
13
+ "encoder.json.gz",
14
+ "vocab.bpe.gz",
15
+ ],
16
+ "glide_text2im.clip": ["config.yaml"],
17
+ },
18
  install_requires=[
19
  "Pillow",
20
  "attrs",
 
22
  "filelock",
23
  "requests",
24
  "tqdm",
25
+ "ftfy",
26
+ "regex",
27
  ],
28
  author="OpenAI",
29
  )