Spaces:

legoandmars
/

glide-inpainting

Runtime error

App Files Files Community

legoandmars commited on Feb 17, 2022

Commit

a57fa2f

1 Parent(s): 0626a14

switch to inpainting

Browse files

Files changed (14) hide show

.gitignore +3 -0
LICENSE +21 -0
README.md +4 -4
app.py +121 -56
grass.png +0 -0
grass_with_mask.png +0 -0
notebooks/clip_guided.ipynb +13 -1
notebooks/inpaint.ipynb +13 -1
notebooks/text2im.ipynb +13 -1
ocean.jpg +0 -0
ocean_with_mask.png +0 -0
requirements.txt +2 -4
server.py +0 -175
setup.py +15 -1

.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+__pycache__/
+*.egg-info/
+.DS_Store

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2021 OpenAI
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md CHANGED Viewed

@@ -1,8 +1,8 @@
 ---
-title: Glide Text2im
-emoji: 📊
-colorFrom: purple
-colorTo: gray
 sdk: gradio
 app_file: app.py
 pinned: false

 ---
+title: GLIDE_Inpaint
+emoji: 💻
+colorFrom: green
+colorTo: purple
 sdk: gradio
 app_file: app.py
 pinned: false

app.py CHANGED Viewed

@@ -1,14 +1,18 @@
-import os
-os.system('pip install -e .')
-import gradio as gr
-import base64
-from io import BytesIO
-# from fastapi import FastAPI
 from PIL import Image
 import torch as th
 from glide_text2im.download import load_checkpoint
 from glide_text2im.model_creation import (
@@ -17,8 +21,12 @@ from glide_text2im.model_creation import (
     model_and_diffusion_defaults_upsampler
 )
-# print("Loading models...")
-# app = FastAPI()
 # This notebook supports both CPU and GPU.
 # On CPU, generating one sample may take on the order of 20 minutes.
@@ -29,6 +37,7 @@ device = th.device('cpu' if not has_cuda else 'cuda')
 # Create base model.
 options = model_and_diffusion_defaults()
 options['use_fp16'] = has_cuda
 options['timestep_respacing'] = '100' # use 100 diffusion steps for fast sampling
 model, diffusion = create_model_and_diffusion(**options)
@@ -36,11 +45,12 @@ model.eval()
 if has_cuda:
     model.convert_to_fp16()
 model.to(device)
-model.load_state_dict(load_checkpoint('base', device))
 print('total base parameters', sum(x.numel() for x in model.parameters()))
 # Create upsampler model.
 options_up = model_and_diffusion_defaults_upsampler()
 options_up['use_fp16'] = has_cuda
 options_up['timestep_respacing'] = 'fast27' # use 27 diffusion steps for very fast sampling
 model_up, diffusion_up = create_model_and_diffusion(**options_up)
@@ -48,20 +58,18 @@ model_up.eval()
 if has_cuda:
     model_up.convert_to_fp16()
 model_up.to(device)
-model_up.load_state_dict(load_checkpoint('upsample', device))
 print('total upsampler parameters', sum(x.numel() for x in model_up.parameters()))
-def get_images(batch: th.Tensor):
-    """ Display a batch of images inline. """
-    scaled = ((batch + 1)*127.5).round().clamp(0,255).to(th.uint8).cpu()
-    reshaped = scaled.permute(2, 0, 3, 1).reshape([batch.shape[2], -1, 3])
-    return Image.fromarray(reshaped.numpy())
-# Create a classifier-free guidance sampling function
-guidance_scale = 3.0
 def model_fn(x_t, ts, **kwargs):
     half = x_t[: len(x_t) // 2]
     combined = th.cat([half, half], dim=0)
@@ -72,19 +80,69 @@ def model_fn(x_t, ts, **kwargs):
     eps = th.cat([half_eps, half_eps], dim=0)
     return th.cat([eps, rest], dim=1)
-# @app.get("/")
-def read_root():
-    return {"glide!"}
-# @app.get("/{generate}")
-def sample(prompt):
-    # Sampling parameters
-    batch_size = 1
-    # Tune this parameter to control the sharpness of 256x256 images.
-    # A value of 1.0 is sharper, but sometimes results in grainy artifacts.
-    upsample_temp = 0.997
     ##############################
     # Sample from the base model #
@@ -103,6 +161,7 @@ def sample(prompt):
     )
     # Pack the tokens together into model kwargs.
     model_kwargs = dict(
         tokens=th.tensor(
             [tokens] * batch_size + [uncond_tokens] * batch_size, device=device
@@ -112,6 +171,10 @@ def sample(prompt):
             dtype=th.bool,
             device=device,
         ),
     )
     # Sample from the base model.
@@ -124,10 +187,10 @@ def sample(prompt):
         progress=True,
         model_kwargs=model_kwargs,
         cond_fn=None,
     )[:batch_size]
     model.del_cache()
     ##############################
     # Upsample the 64x64 samples #
     ##############################
@@ -151,12 +214,16 @@ def sample(prompt):
             dtype=th.bool,
             device=device,
         ),
     )
     # Sample from the base model.
     model_up.del_cache()
     up_shape = (batch_size, 3, options_up["image_size"], options_up["image_size"])
-    up_samples = diffusion_up.ddim_sample_loop(
         model_up,
         up_shape,
         noise=th.randn(up_shape, device=device) * upsample_temp,
@@ -165,32 +232,30 @@ def sample(prompt):
         progress=True,
         model_kwargs=model_kwargs,
         cond_fn=None,
     )[:batch_size]
     model_up.del_cache()
-    # Show the output
-    image = get_images(up_samples)
-    # image = to_base64(image)
-    # return {"image": image}
-    return image
-def to_base64(pil_image):
-    buffered = BytesIO()
-    pil_image.save(buffered, format="JPEG")
-    return base64.b64encode(buffered.getvalue())
-title = "Interactive demo: glide-text2im"
-description = "Demo for OpenAI's GLIDE: Towards Photorealistic Image Generation and Editing with Text-Guided Diffusion Models."
-article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2112.10741'>GLIDE: Towards Photorealistic Image Generation and Editing with Text-Guided Diffusion Models</a> | <a href='https://github.com/openai/glide-text2im/'>Official Repo</a></p>"
-examples =["an oil painting of a corgi"]
-iface = gr.Interface(fn=sample,
-                     inputs=gr.inputs.Textbox(label='What would you like to see?'),
-                     outputs=gr.outputs.Image(type="pil", label="Model input + completions"),
-                     title=title,
-                     description=description,
-                     article=article,
-                     examples=examples,
                      enable_queue=True)
-iface.launch(debug=True)

+import subprocess
+subprocess.run('pip install -e .', shell=True)
+print("Installed the repo!")
+# GLIDE imports
+from typing import Tuple
+from IPython.display import display
 from PIL import Image
+import PIL
+import PIL.ImageOps
+import numpy as np
 import torch as th
+import torch.nn.functional as F
 from glide_text2im.download import load_checkpoint
 from glide_text2im.model_creation import (
     model_and_diffusion_defaults_upsampler
 )
+# gradio app imports
+import gradio as gr
+from torchvision.transforms import ToTensor, ToPILImage
+image_to_tensor = ToTensor()
+tensor_to_image = ToPILImage()
 # This notebook supports both CPU and GPU.
 # On CPU, generating one sample may take on the order of 20 minutes.
 # Create base model.
 options = model_and_diffusion_defaults()
+options['inpaint'] = True
 options['use_fp16'] = has_cuda
 options['timestep_respacing'] = '100' # use 100 diffusion steps for fast sampling
 model, diffusion = create_model_and_diffusion(**options)
 if has_cuda:
     model.convert_to_fp16()
 model.to(device)
+model.load_state_dict(load_checkpoint('base-inpaint', device))
 print('total base parameters', sum(x.numel() for x in model.parameters()))
 # Create upsampler model.
 options_up = model_and_diffusion_defaults_upsampler()
+options_up['inpaint'] = True
 options_up['use_fp16'] = has_cuda
 options_up['timestep_respacing'] = 'fast27' # use 27 diffusion steps for very fast sampling
 model_up, diffusion_up = create_model_and_diffusion(**options_up)
 if has_cuda:
     model_up.convert_to_fp16()
 model_up.to(device)
+model_up.load_state_dict(load_checkpoint('upsample-inpaint', device))
 print('total upsampler parameters', sum(x.numel() for x in model_up.parameters()))
+# Sampling parameters
+batch_size = 1
+guidance_scale = 5.0
+# Tune this parameter to control the sharpness of 256x256 images.
+# A value of 1.0 is sharper, but sometimes results in grainy artifacts.
+upsample_temp = 0.997
+# Create an classifier-free guidance sampling function
 def model_fn(x_t, ts, **kwargs):
     half = x_t[: len(x_t) // 2]
     combined = th.cat([half, half], dim=0)
     eps = th.cat([half_eps, half_eps], dim=0)
     return th.cat([eps, rest], dim=1)
+def denoised_fn(x_start):
+    # Force the model to have the exact right x_start predictions
+    # for the part of the image which is known.
+    return (
+        x_start * (1 - model_kwargs['inpaint_mask'])
+        + model_kwargs['inpaint_image'] * model_kwargs['inpaint_mask']
+    )
+def show_images(batch: th.Tensor):
+    """ Display a batch of images inline. """
+    scaled = ((batch + 1)*127.5).round().clamp(0,255).to(th.uint8).cpu()
+    reshaped = scaled.permute(2, 0, 3, 1).reshape([batch.shape[2], -1, 3])
+    return Image.fromarray(reshaped.numpy())
+def read_image(path: str, size: int = 256) -> Tuple[th.Tensor, th.Tensor]:
+    pil_img = Image.open(path).convert('RGB')
+    pil_img = pil_img.resize((size, size), resample=Image.BICUBIC)
+    img = np.array(pil_img)
+    return th.from_numpy(img)[None].permute(0, 3, 1, 2).float() / 127.5 - 1
+def read_mask(path: str, size: int = 256) -> Tuple[th.Tensor, th.Tensor]:
+    #pil_img = PIL.Image.open(path).convert('L')
+    pil_img_full = PIL.Image.open(path).convert('RGBA')
+    #image = Image.open( inputImagePath ).convert( 'RGBA' )
+    pil_img = pil_img_full.getchannel( 'A' ) # Mode 'L'
+    # pil_img = PIL.ImageOps.invert(pil_img)
+    pil_img = pil_img.resize((size, size), resample=PIL.Image.BICUBIC)
+    img = np.array(pil_img)[..., np.newaxis]
+    return th.from_numpy(img)[None].permute(0, 3, 1, 2).float() / 255.0
+def pil_to_numpy(pil_img: Image) -> Tuple[th.Tensor, th.Tensor]:
+    img = np.array(pil_img)
+    return th.from_numpy(img)[None].permute(0, 3, 1, 2).float() / 127.5 - 1
+model_kwargs = dict()
+def inpaint(input_img, input_img_with_mask, prompt):
+    print(prompt)
+    # Save as png for later mask detection :)
+    input_img_256 = input_img.convert('RGB').resize((256, 256), resample=Image.BICUBIC)
+    input_img_64 = input_img.convert('RGB').resize((64, 64), resample=Image.BICUBIC)
+    input_img_with_mask_64 = input_img.convert('RGBA').getchannel('A').resize((64, 64), resample=Image.BICUBIC)
+    # Source image we are inpainting
+    source_image_256 = pil_to_numpy(input_img_256)
+    source_image_64 = pil_to_numpy(input_img_64)
+    # Since gradio doesn't supply which pixels were drawn, we need to find it ourselves!
+    # Assuming that all black pixels are meant for inpainting.
+    # input_img_with_mask_64 = input_img_with_mask.convert('L').resize((64, 64), resample=Image.BICUBIC)
+    gray_scale_source_image = image_to_tensor(input_img_with_mask_64)
+    source_mask_64 = (gray_scale_source_image!=0).float()
+    source_mask_64_img = tensor_to_image(source_mask_64)
+    # The mask should always be a boolean 64x64 mask, and then we
+    # can upsample it for the second stage.
+    source_mask_64 = source_mask_64.unsqueeze(0)
+    source_mask_256 = F.interpolate(source_mask_64, (256, 256), mode='nearest')
     ##############################
     # Sample from the base model #
     )
     # Pack the tokens together into model kwargs.
+    global model_kwargs
     model_kwargs = dict(
         tokens=th.tensor(
             [tokens] * batch_size + [uncond_tokens] * batch_size, device=device
             dtype=th.bool,
             device=device,
         ),
+        # Masked inpainting image
+        inpaint_image=(source_image_64 * source_mask_64).repeat(full_batch_size, 1, 1, 1).to(device),
+        inpaint_mask=source_mask_64.repeat(full_batch_size, 1, 1, 1).to(device),
     )
     # Sample from the base model.
         progress=True,
         model_kwargs=model_kwargs,
         cond_fn=None,
+        denoised_fn=denoised_fn,
     )[:batch_size]
     model.del_cache()
     ##############################
     # Upsample the 64x64 samples #
     ##############################
             dtype=th.bool,
             device=device,
         ),
+        # Masked inpainting image.
+        inpaint_image=(source_image_256 * source_mask_256).repeat(batch_size, 1, 1, 1).to(device),
+        inpaint_mask=source_mask_256.repeat(batch_size, 1, 1, 1).to(device),
     )
     # Sample from the base model.
     model_up.del_cache()
     up_shape = (batch_size, 3, options_up["image_size"], options_up["image_size"])
+    up_samples = diffusion_up.p_sample_loop(
         model_up,
         up_shape,
         noise=th.randn(up_shape, device=device) * upsample_temp,
         progress=True,
         model_kwargs=model_kwargs,
         cond_fn=None,
+        denoised_fn=denoised_fn,
     )[:batch_size]
     model_up.del_cache()
+    return source_mask_64_img, show_images(up_samples)
+gradio_inputs = [gr.inputs.Image(type='pil',
+                                 label="Input Image"),
+                 gr.inputs.Image(type='pil',
+                                 label="Input Image With Mask"),
+                 gr.inputs.Textbox(label='Conditional Text to Inpaint')]
+# gradio_outputs = [gr.outputs.Image(label='Auto-Detected Mask (From drawn black pixels)')]
+gradio_outputs = [gr.outputs.Image(label='Auto-Detected Mask (From drawn black pixels)'),
+                 gr.outputs.Image(label='Inpainted Image')]
+examples = [['grass.png', 'grass_with_mask.png', 'a corgi in a field']]
+title = "GLIDE Inpaint"
+description = "[WARNING: Queue times may take 4-6 minutes per person if there's no GPU! If there is a GPU, it'll take around 60 seconds] Using GLIDE to inpaint black regions of an input image! Instructions: 1) For the 'Input Image', upload an image. 2) For the 'Input Image with Mask', draw a black-colored mask (either manually with something like Paint, or by using gradio's built-in image editor & add a black-colored shape) IT MUST BE BLACK COLOR, but doesn't have to be rectangular! This is because it auto-detects the mask based on 0 (black) pixel values! 3) For the Conditional Text, type something you'd like to see the black region get filled in with :)"
+article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2112.10741' target='_blank'>GLIDE: Towards Photorealistic Image Generation and Editing with Text-Guided Diffusion Models</a> | <a href='https://github.com/openai/glide-text2im' target='_blank'>Github Repo</a> | <img src='https://visitor-badge.glitch.me/badge?page_id=epoching_glide_inpaint' alt='visitor badge'></p>"
+iface = gr.Interface(fn=inpaint, inputs=gradio_inputs,
+                     outputs=gradio_outputs,
+                     examples=examples, title=title,
+                     description=description, article=article,
                      enable_queue=True)
+iface.launch()

grass.png ADDED Viewed

grass_with_mask.png ADDED Viewed

notebooks/clip_guided.ipynb CHANGED Viewed

@@ -1,5 +1,16 @@
 {
  "cells": [
   {
    "cell_type": "code",
    "execution_count": null,
@@ -227,7 +238,8 @@
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
    "version": "3.7.3"
-  }
  },
  "nbformat": 4,
  "nbformat_minor": 2

 {
  "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Run this line in Colab to install the package if it is\n",
+    "# not already installed.\n",
+    "!pip install git+https://github.com/openai/glide-text2im"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
    "version": "3.7.3"
+  },
+  "accelerator": "GPU"
  },
  "nbformat": 4,
  "nbformat_minor": 2

notebooks/inpaint.ipynb CHANGED Viewed

@@ -1,5 +1,16 @@
 {
  "cells": [
   {
    "cell_type": "code",
    "execution_count": null,
@@ -283,7 +294,8 @@
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
    "version": "3.7.3"
-  }
  },
  "nbformat": 4,
  "nbformat_minor": 2

 {
  "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Run this line in Colab to install the package if it is\n",
+    "# not already installed.\n",
+    "!pip install git+https://github.com/openai/glide-text2im"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
    "version": "3.7.3"
+  },
+  "accelerator": "GPU"
  },
  "nbformat": 4,
  "nbformat_minor": 2

notebooks/text2im.ipynb CHANGED Viewed

@@ -1,5 +1,16 @@
 {
  "cells": [
   {
    "cell_type": "code",
    "execution_count": null,
@@ -232,7 +243,8 @@
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
    "version": "3.7.3"
-  }
  },
  "nbformat": 4,
  "nbformat_minor": 2

 {
  "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Run this line in Colab to install the package if it is\n",
+    "# not already installed.\n",
+    "!pip install git+https://github.com/openai/glide-text2im"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
    "version": "3.7.3"
+  },
+  "accelerator": "GPU"
  },
  "nbformat": 4,
  "nbformat_minor": 2

ocean.jpg ADDED Viewed

ocean_with_mask.png ADDED Viewed

requirements.txt CHANGED Viewed

@@ -1,4 +1,2 @@
-git+https://github.com/openai/glide-text2im.git
-fastapi
-uvicorn
-regex


1	+ gradio
2	+ torchvision

server.py DELETED Viewed

@@ -1,175 +0,0 @@
-import base64
-from io import BytesIO
-from fastapi import FastAPI
-from PIL import Image
-import torch as th
-from glide_text2im.download import load_checkpoint
-from glide_text2im.model_creation import (
-    create_model_and_diffusion,
-    model_and_diffusion_defaults,
-    model_and_diffusion_defaults_upsampler
-)
-print("Loading models...")
-app = FastAPI()
-# This notebook supports both CPU and GPU.
-# On CPU, generating one sample may take on the order of 20 minutes.
-# On a GPU, it should be under a minute.
-has_cuda = th.cuda.is_available()
-device = th.device('cpu' if not has_cuda else 'cuda')
-# Create base model.
-options = model_and_diffusion_defaults()
-options['use_fp16'] = has_cuda
-options['timestep_respacing'] = '100' # use 100 diffusion steps for fast sampling
-model, diffusion = create_model_and_diffusion(**options)
-model.eval()
-if has_cuda:
-    model.convert_to_fp16()
-model.to(device)
-model.load_state_dict(load_checkpoint('base', device))
-print('total base parameters', sum(x.numel() for x in model.parameters()))
-# Create upsampler model.
-options_up = model_and_diffusion_defaults_upsampler()
-options_up['use_fp16'] = has_cuda
-options_up['timestep_respacing'] = 'fast27' # use 27 diffusion steps for very fast sampling
-model_up, diffusion_up = create_model_and_diffusion(**options_up)
-model_up.eval()
-if has_cuda:
-    model_up.convert_to_fp16()
-model_up.to(device)
-model_up.load_state_dict(load_checkpoint('upsample', device))
-print('total upsampler parameters', sum(x.numel() for x in model_up.parameters()))
-def get_images(batch: th.Tensor):
-    """ Display a batch of images inline. """
-    scaled = ((batch + 1)*127.5).round().clamp(0,255).to(th.uint8).cpu()
-    reshaped = scaled.permute(2, 0, 3, 1).reshape([batch.shape[2], -1, 3])
-    Image.fromarray(reshaped.numpy())
-# Create a classifier-free guidance sampling function
-guidance_scale = 3.0
-def model_fn(x_t, ts, **kwargs):
-    half = x_t[: len(x_t) // 2]
-    combined = th.cat([half, half], dim=0)
-    model_out = model(combined, ts, **kwargs)
-    eps, rest = model_out[:, :3], model_out[:, 3:]
-    cond_eps, uncond_eps = th.split(eps, len(eps) // 2, dim=0)
-    half_eps = uncond_eps + guidance_scale * (cond_eps - uncond_eps)
-    eps = th.cat([half_eps, half_eps], dim=0)
-    return th.cat([eps, rest], dim=1)
-@app.get("/")
-def read_root():
-    return {"glide!"}
-@app.get("/{generate}")
-def sample(prompt):
-    # Sampling parameters
-    batch_size = 1
-    # Tune this parameter to control the sharpness of 256x256 images.
-    # A value of 1.0 is sharper, but sometimes results in grainy artifacts.
-    upsample_temp = 0.997
-    ##############################
-    # Sample from the base model #
-    ##############################
-    # Create the text tokens to feed to the model.
-    tokens = model.tokenizer.encode(prompt)
-    tokens, mask = model.tokenizer.padded_tokens_and_mask(
-        tokens, options['text_ctx']
-    )
-    # Create the classifier-free guidance tokens (empty)
-    full_batch_size = batch_size * 2
-    uncond_tokens, uncond_mask = model.tokenizer.padded_tokens_and_mask(
-        [], options['text_ctx']
-    )
-    # Pack the tokens together into model kwargs.
-    model_kwargs = dict(
-        tokens=th.tensor(
-            [tokens] * batch_size + [uncond_tokens] * batch_size, device=device
-        ),
-        mask=th.tensor(
-            [mask] * batch_size + [uncond_mask] * batch_size,
-            dtype=th.bool,
-            device=device,
-        ),
-    )
-    # Sample from the base model.
-    model.del_cache()
-    samples = diffusion.p_sample_loop(
-        model_fn,
-        (full_batch_size, 3, options["image_size"], options["image_size"]),
-        device=device,
-        clip_denoised=True,
-        progress=True,
-        model_kwargs=model_kwargs,
-        cond_fn=None,
-    )[:batch_size]
-    model.del_cache()
-    ##############################
-    # Upsample the 64x64 samples #
-    ##############################
-    tokens = model_up.tokenizer.encode(prompt)
-    tokens, mask = model_up.tokenizer.padded_tokens_and_mask(
-        tokens, options_up['text_ctx']
-    )
-    # Create the model conditioning dict.
-    model_kwargs = dict(
-        # Low-res image to upsample.
-        low_res=((samples+1)*127.5).round()/127.5 - 1,
-        # Text tokens
-        tokens=th.tensor(
-            [tokens] * batch_size, device=device
-        ),
-        mask=th.tensor(
-            [mask] * batch_size,
-            dtype=th.bool,
-            device=device,
-        ),
-    )
-    # Sample from the base model.
-    model_up.del_cache()
-    up_shape = (batch_size, 3, options_up["image_size"], options_up["image_size"])
-    up_samples = diffusion_up.ddim_sample_loop(
-        model_up,
-        up_shape,
-        noise=th.randn(up_shape, device=device) * upsample_temp,
-        device=device,
-        clip_denoised=True,
-        progress=True,
-        model_kwargs=model_kwargs,
-        cond_fn=None,
-    )[:batch_size]
-    model_up.del_cache()
-    # Show the output
-    image = get_images(up_samples)
-    image = to_base64(image)
-    return {"image": image}
-def to_base64(pil_image):
-    buffered = BytesIO()
-    pil_image.save(buffered, format="JPEG")
-    return base64.b64encode(buffered.getvalue())

setup.py CHANGED Viewed

@@ -2,7 +2,19 @@ from setuptools import setup
 setup(
     name="glide-text2im",
-    packages=["glide_text2im"],
     install_requires=[
         "Pillow",
         "attrs",
@@ -10,6 +22,8 @@ setup(
         "filelock",
         "requests",
         "tqdm",
     ],
     author="OpenAI",
 )

 setup(
     name="glide-text2im",
+    packages=[
+        "glide_text2im",
+        "glide_text2im.clip",
+        "glide_text2im.tokenizer",
+    ],
+    package_data={
+        "glide_text2im.tokenizer": [
+            "bpe_simple_vocab_16e6.txt.gz",
+            "encoder.json.gz",
+            "vocab.bpe.gz",
+        ],
+        "glide_text2im.clip": ["config.yaml"],
+    },
     install_requires=[
         "Pillow",
         "attrs",
         "filelock",
         "requests",
         "tqdm",
+        "ftfy",
+        "regex",
     ],
     author="OpenAI",
 )