File size: 5,384 Bytes

a5407e7

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "from tqdm.auto import tqdm\n",
    "\n",
    "from point_e.diffusion.configs import DIFFUSION_CONFIGS, diffusion_from_config\n",
    "from point_e.diffusion.sampler import PointCloudSampler\n",
    "from point_e.models.download import load_checkpoint\n",
    "from point_e.models.configs import MODEL_CONFIGS, model_from_config\n",
    "from point_e.util.plotting import plot_point_cloud"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "creating base model...\n",
      "creating upsample model...\n",
      "downloading base checkpoint...\n",
      "downloading upsampler checkpoint...\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<All keys matched successfully>"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "device = torch.device('cuda')\n",
    "\n",
    "print('creating base model...')\n",
    "base_name = 'base40M-textvec'\n",
    "base_model = model_from_config(MODEL_CONFIGS[base_name], device)\n",
    "base_model.eval()\n",
    "base_diffusion = diffusion_from_config(DIFFUSION_CONFIGS[base_name])\n",
    "\n",
    "print('creating upsample model...')\n",
    "upsampler_model = model_from_config(MODEL_CONFIGS['upsample'], device)\n",
    "upsampler_model.eval()\n",
    "upsampler_diffusion = diffusion_from_config(DIFFUSION_CONFIGS['upsample'])\n",
    "\n",
    "print('downloading base checkpoint...')\n",
    "base_model.load_state_dict(load_checkpoint(base_name, device))\n",
    "\n",
    "print('downloading upsampler checkpoint...')\n",
    "upsampler_model.load_state_dict(load_checkpoint('upsample', device))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "sampler = PointCloudSampler(\n",
    "    device=device,\n",
    "    models=[base_model, upsampler_model],\n",
    "    diffusions=[base_diffusion, upsampler_diffusion],\n",
    "    num_points=[1024, 4096 - 1024],\n",
    "    aux_channels=['R', 'G', 'B'],\n",
    "    guidance_scale=[3.0, 0.0],\n",
    "    model_kwargs_key_filter=('texts', ''), # Do not condition the upsampler at all\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "d118bb96378d483cb4065eff8cc72f3e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Set a prompt to condition on.\n",
    "prompt = 'A bluebird mid-flight'\n",
    "\n",
    "# Produce a sample from the model.\n",
    "samples = None\n",
    "for x in tqdm(sampler.sample_batch_progressive(batch_size=1, model_kwargs=dict(texts=[prompt]))):\n",
    "    samples = x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pc = sampler.output_to_point_clouds(samples)[0]\n",
    "fig = plot_point_cloud(pc, grid_size=2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "pc = sampler.output_to_point_clouds(samples)[0]\n",
    "with open('example_data/blue_bird.ply','wb') as f:\n",
    "    pc.write_ply(f)\n",
    "#pc.save('example_data/blue_bird.npz')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "PointCloud(coords=array([[ 0.2723148 ,  0.15381676,  0.00761994],\n",
      "       [-0.25651944,  0.18783031,  0.0212175 ],\n",
      "       [ 0.12061116,  0.05600073,  0.00907129],\n",
      "       ...,\n",
      "       [-0.00173135, -0.13978139,  0.00305715],\n",
      "       [-0.00342358,  0.15288702, -0.02027267],\n",
      "       [-0.116662  ,  0.14891137,  0.02732913]], dtype=float32), channels={'R': array([0.03137255, 0.02745098, 0.02745098, ..., 0.03529412, 0.03529412,\n",
      "       0.03529412], dtype=float32), 'G': array([0.00392157, 0.        , 0.        , ..., 0.01176471, 0.00784314,\n",
      "       0.01176471], dtype=float32), 'B': array([1., 1., 1., ..., 1., 1., 1.], dtype=float32)})\n"
     ]
    }
   ],
   "source": [
    "pc = sampler.output_to_point_clouds(samples)[0]\n",
    "print(pc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python (TF GPU)",
   "language": "python",
   "name": "tf"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.18"
  },
  "vscode": {
   "interpreter": {
    "hash": "b270b0f43bc427bcab7703c037711644cc480aac7c1cc8d2940cfaf0b447ee2e"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}