Spaces:

Inmental
/

img2img-turbo

Paused

App Files Files Community

Inmental commited on Aug 8, 2024

Commit

38d88fc

verified ·

1 Parent(s): 26eda4d

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +11 -0
.gitignore +171 -0
LICENSE +21 -0
README.md +234 -12
assets/cat_2x.gif +3 -0
assets/clear2rainy_results.jpg +3 -0
assets/day2night_results.jpg +3 -0
assets/edge_to_image_results.jpg +3 -0
assets/examples/bird.png +3 -0
assets/examples/bird_canny.png +0 -0
assets/examples/bird_canny_blue.png +0 -0
assets/examples/circles_inference_input.png +0 -0
assets/examples/circles_inference_output.png +0 -0
assets/examples/clear2rainy_input.png +0 -0
assets/examples/clear2rainy_output.png +0 -0
assets/examples/day2night_input.png +0 -0
assets/examples/day2night_output.png +0 -0
assets/examples/my_horse2zebra_input.jpg +0 -0
assets/examples/my_horse2zebra_output.jpg +0 -0
assets/examples/night2day_input.png +0 -0
assets/examples/night2day_output.png +0 -0
assets/examples/rainy2clear_input.png +0 -0
assets/examples/rainy2clear_output.png +0 -0
assets/examples/sketch_input.png +0 -0
assets/examples/sketch_output.png +0 -0
assets/examples/training_evaluation.png +0 -0
assets/examples/training_evaluation_unpaired.png +0 -0
assets/examples/training_step_0.png +0 -0
assets/examples/training_step_500.png +0 -0
assets/examples/training_step_6000.png +0 -0
assets/fish_2x.gif +3 -0
assets/gen_variations.jpg +3 -0
assets/method.jpg +0 -0
assets/night2day_results.jpg +3 -0
assets/rainy2clear.jpg +3 -0
assets/teaser_results.jpg +3 -0
docs/training_cyclegan_turbo.md +98 -0
docs/training_pix2pix_turbo.md +118 -0
environment.yaml +34 -0
gradio_canny2image.py +78 -0
gradio_sketch2image.py +382 -0
python==3.9.8/Lib/site-packages/wheel/cli/tags.py +139 -0
python==3.9.8/conda-meta/history +19 -0
requirements.txt +28 -0
scripts/download_fill50k.sh +5 -0
scripts/download_horse2zebra.sh +5 -0
src/cyclegan_turbo.py +254 -0
src/image_prep.py +12 -0
src/inference_paired.py +65 -0
src/inference_unpaired.py +53 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,14 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+assets/cat_2x.gif filter=lfs diff=lfs merge=lfs -text
+assets/clear2rainy_results.jpg filter=lfs diff=lfs merge=lfs -text
+assets/day2night_results.jpg filter=lfs diff=lfs merge=lfs -text
+assets/edge_to_image_results.jpg filter=lfs diff=lfs merge=lfs -text
+assets/examples/bird.png filter=lfs diff=lfs merge=lfs -text
+assets/fish_2x.gif filter=lfs diff=lfs merge=lfs -text
+assets/gen_variations.jpg filter=lfs diff=lfs merge=lfs -text
+assets/night2day_results.jpg filter=lfs diff=lfs merge=lfs -text
+assets/rainy2clear.jpg filter=lfs diff=lfs merge=lfs -text
+assets/teaser_results.jpg filter=lfs diff=lfs merge=lfs -text
+triton-2.1.0-cp310-cp310-win_amd64.whl filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,171 @@

+single_step_translation/
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+single_step_translation
+gradio
+checkpoints/
+img2img-turbo-sketch
+outputs/
+outputs/bird.png
+data
+wandb
+output/

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2024 img-to-img-turbo
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md CHANGED Viewed

@@ -1,12 +1,234 @@
----
-title: Img2img Turbo
-emoji: 🏃
-colorFrom: red
-colorTo: indigo
-sdk: gradio
-sdk_version: 4.41.0
-app_file: app.py
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+---
+title: img2img-turbo
+app_file: gradio_sketch2image.py
+sdk: gradio
+sdk_version: 3.43.1
+---
+# img2img-turbo
+[**Paper**](https://arxiv.org/abs/2403.12036) | [**Sketch2Image Demo**](https://huggingface.co/spaces/gparmar/img2img-turbo-sketch)
+#### **Quick start:** [**Running Locally**](#getting-started) | [**Gradio (locally hosted)**](#gradio-demo) | [**Training**](#training-with-your-own-data)
+### Cat Sketching
+<p align="left" >
+<img src="https://raw.githubusercontent.com/GaParmar/img2img-turbo/main/assets/cat_2x.gif" width="800" />
+</p>
+### Fish Sketching
+<p align="left">
+<img src="https://raw.githubusercontent.com/GaParmar/img2img-turbo/main/assets/fish_2x.gif"  width="800" />
+</p>
+We propose a general method for adapting a single-step diffusion model, such as SD-Turbo, to new tasks and domains through adversarial learning. This enables us to leverage the internal knowledge of pre-trained diffusion models while achieving efficient inference (e.g., for 512x512 images, 0.29 seconds on A6000 and 0.11 seconds on A100).
+Our one-step conditional models **CycleGAN-Turbo** and **pix2pix-turbo** can perform various image-to-image translation tasks for both unpaired and paired settings. CycleGAN-Turbo outperforms existing GAN-based and diffusion-based methods, while pix2pix-turbo is on par with recent works such as ControlNet for Sketch2Photo and Edge2Image, but with one-step inference.
+[One-Step Image Translation with Text-to-Image Models](https://arxiv.org/abs/2403.12036)<br>
+[Gaurav Parmar](https://gauravparmar.com/), [Taesung Park](https://taesung.me/), [Srinivasa Narasimhan](https://www.cs.cmu.edu/~srinivas/), [Jun-Yan Zhu](https://github.com/junyanz/)<br>
+CMU and Adobe, arXiv 2403.12036
+<br>
+<div>
+<p align="center">
+<img src='assets/teaser_results.jpg' align="center" width=1000px>
+</p>
+</div>
+## Results
+### Paired Translation with pix2pix-turbo
+**Edge to Image**
+<div>
+<p align="center">
+<img src='assets/edge_to_image_results.jpg' align="center" width=800px>
+</p>
+</div>
+<!-- **Sketch to Image**
+TODO -->
+### Generating Diverse Outputs
+By varying the input noise map, our method can generate diverse outputs from the same input conditioning.
+The output style can be controlled by changing the text prompt.
+<div> <p align="center">
+<img src='assets/gen_variations.jpg' align="center" width=800px>
+</p> </div>
+### Unpaired Translation with CycleGAN-Turbo
+**Day to Night**
+<div> <p align="center">
+<img src='assets/day2night_results.jpg' align="center" width=800px>
+</p> </div>
+**Night to Day**
+<div><p align="center">
+<img src='assets/night2day_results.jpg' align="center" width=800px>
+</p> </div>
+**Clear to Rainy**
+<div>
+<p align="center">
+<img src='assets/clear2rainy_results.jpg' align="center" width=800px>
+</p>
+</div>
+**Rainy to Clear**
+<div>
+<p align="center">
+<img src='assets/rainy2clear.jpg' align="center" width=800px>
+</p>
+</div>
+<hr>
+## Method
+**Our Generator Architecture:**
+We tightly integrate three separate modules in the original latent diffusion models into a single end-to-end network with small trainable weights. This architecture allows us to translate the input image x to the output y, while retaining the input scene structure. We use LoRA adapters in each module, introduce skip connections and Zero-Convs between input and output, and retrain the first layer of the U-Net. Blue boxes indicate trainable layers. Semi-transparent layers are frozen. The same generator can be used for various GAN objectives.
+<div>
+<p align="center">
+<img src='assets/method.jpg' align="center" width=900px>
+</p>
+</div>
+## Getting Started
+**Environment Setup**
+- We provide a [conda env file](environment.yml) that contains all the required dependencies.
+    ```
+    conda env create -f environment.yaml
+    ```
+- Following this, you can activate the conda environment with the command below.
+  ```
+  conda activate img2img-turbo
+  ```
+- Or use virtual environment:
+  ```
+  python3 -m venv venv
+  source venv/bin/activate
+  pip install -r requirements.txt
+  ```
+**Paired Image Translation (pix2pix-turbo)**
+- The following command takes an image file and a prompt as inputs, extracts the canny edges, and saves the results in the directory specified.
+    ```bash
+    python src/inference_paired.py --model_name "edge_to_image" \
+        --input_image "assets/examples/bird.png" \
+        --prompt "a blue bird" \
+        --output_dir "outputs"
+    ```
+    <table>
+    <th>Input Image</th>
+    <th>Canny Edges</th>
+    <th>Model Output</th>
+    </tr>
+    <tr>
+    <td><img src='assets/examples/bird.png' width="200px"></td>
+    <td><img src='assets/examples/bird_canny.png' width="200px"></td>
+    <td><img src='assets/examples/bird_canny_blue.png' width="200px"></td>
+    </tr>
+    </table>
+    <br>
+- The following command takes a sketch and a prompt as inputs, and saves the results in the directory specified.
+    ```bash
+    python src/inference_paired.py --model_name "sketch_to_image_stochastic" \
+    --input_image "assets/examples/sketch_input.png" --gamma 0.4 \
+    --prompt "ethereal fantasy concept art of an asteroid. magnificent, celestial, ethereal, painterly, epic, majestic, magical, fantasy art, cover art, dreamy" \
+    --output_dir "outputs"
+    ```
+    <table>
+    <th>Input</th>
+    <th>Model Output</th>
+    </tr>
+    <tr>
+    <td><img src='assets/examples/sketch_input.png' width="400px"></td>
+    <td><img src='assets/examples/sketch_output.png' width="400px"></td>
+    </tr>
+    </table>
+    <br>
+**Unpaired Image Translation (CycleGAN-Turbo)**
+- The following command takes a **day** image file as input, and saves the output **night** in the directory specified.
+    ```
+    python src/inference_unpaired.py --model_name "day_to_night" \
+        --input_image "assets/examples/day2night_input.png" --output_dir "outputs"
+    ```
+    <table>
+    <th>Input (day)</th>
+    <th>Model Output (night)</th>
+    </tr>
+    <tr>
+    <td><img src='assets/examples/day2night_input.png' width="400px"></td>
+    <td><img src='assets/examples/day2night_output.png' width="400px"></td>
+    </tr>
+    </table>
+- The following command takes a **night** image file as input, and saves the output **day** in the directory specified.
+    ```
+    python src/inference_unpaired.py --model_name "night_to_day" \
+        --input_image "assets/examples/night2day_input.png" --output_dir "outputs"
+    ```
+    <table>
+    <th>Input (night)</th>
+    <th>Model Output (day)</th>
+    </tr>
+    <tr>
+    <td><img src='assets/examples/night2day_input.png' width="400px"></td>
+    <td><img src='assets/examples/night2day_output.png' width="400px"></td>
+    </tr>
+    </table>
+- The following command takes a **clear** image file as input, and saves the output **rainy** in the directory specified.
+    ```
+    python src/inference_unpaired.py --model_name "clear_to_rainy" \
+        --input_image "assets/examples/clear2rainy_input.png" --output_dir "outputs"
+    ```
+    <table>
+    <th>Input (clear)</th>
+    <th>Model Output (rainy)</th>
+    </tr>
+    <tr>
+    <td><img src='assets/examples/clear2rainy_input.png' width="400px"></td>
+    <td><img src='assets/examples/clear2rainy_output.png' width="400px"></td>
+    </tr>
+    </table>
+- The following command takes a **rainy** image file as input, and saves the output **clear** in the directory specified.
+    ```
+    python src/inference_unpaired.py --model_name "rainy_to_clear" \
+        --input_image "assets/examples/rainy2clear_input.png" --output_dir "outputs"
+    ```
+    <table>
+    <th>Input (rainy)</th>
+    <th>Model Output (clear)</th>
+    </tr>
+    <tr>
+    <td><img src='assets/examples/rainy2clear_input.png' width="400px"></td>
+    <td><img src='assets/examples/rainy2clear_output.png' width="400px"></td>
+    </tr>
+    </table>
+## Gradio Demo
+- We provide a Gradio demo for the paired image translation tasks.
+- The following command will launch the sketch to image locally using gradio.
+    ```
+    gradio gradio_sketch2image.py
+    ```
+- The following command will launch the canny edge to image gradio demo locally.
+   ```
+    gradio gradio_canny2image.py
+   ```
+## Training with your own data
+- See the steps [here](docs/training_pix2pix_turbo.md) for training a pix2pix-turbo model on your paired data.
+- See the steps [here](docs/training_cyclegan_turbo.md) for training a CycleGAN-Turbo model on your unpaired data.
+## Acknowledgment
+Our work uses the Stable Diffusion-Turbo as the base model with the following [LICENSE](https://huggingface.co/stabilityai/sd-turbo/blob/main/LICENSE).

assets/cat_2x.gif ADDED Viewed

Git LFS Details

SHA256: 65a49403cf594d7b5300547edded6794e1306b61fb5f6837a96320a17954e826
Pointer size: 132 Bytes
Size of remote file: 4.63 MB

assets/clear2rainy_results.jpg ADDED Viewed

Git LFS Details

SHA256: f8b03789185cdb546080d0a3173e1e7054a4a013c2f3581d4d69fb4f99fe94d2
Pointer size: 132 Bytes
Size of remote file: 2.87 MB

assets/day2night_results.jpg ADDED Viewed

Git LFS Details

SHA256: 152448e2de3e09184f34e2d4bf8f41af02669fb6dafd77f4994a5da3b50410bf
Pointer size: 132 Bytes
Size of remote file: 2.91 MB

assets/edge_to_image_results.jpg ADDED Viewed

Git LFS Details

SHA256: c0e900c2fe954443b87c8643980c287ff91066a5adb21fbec75595c00a4ab615
Pointer size: 132 Bytes
Size of remote file: 2.37 MB

assets/examples/bird.png ADDED Viewed

Git LFS Details

SHA256: cad49fc7d3071b2bcd078bc8dde365f8fa62eaa6d43705fd50c212794a3aac35
Pointer size: 132 Bytes
Size of remote file: 1.07 MB

assets/examples/bird_canny.png ADDED Viewed

assets/examples/bird_canny_blue.png ADDED Viewed

assets/examples/circles_inference_input.png ADDED Viewed

assets/examples/circles_inference_output.png ADDED Viewed

assets/examples/clear2rainy_input.png ADDED Viewed

assets/examples/clear2rainy_output.png ADDED Viewed

assets/examples/day2night_input.png ADDED Viewed

assets/examples/day2night_output.png ADDED Viewed

assets/examples/my_horse2zebra_input.jpg ADDED Viewed

assets/examples/my_horse2zebra_output.jpg ADDED Viewed

assets/examples/night2day_input.png ADDED Viewed

assets/examples/night2day_output.png ADDED Viewed

assets/examples/rainy2clear_input.png ADDED Viewed

assets/examples/rainy2clear_output.png ADDED Viewed

assets/examples/sketch_input.png ADDED Viewed

assets/examples/sketch_output.png ADDED Viewed

assets/examples/training_evaluation.png ADDED Viewed

assets/examples/training_evaluation_unpaired.png ADDED Viewed

assets/examples/training_step_0.png ADDED Viewed

assets/examples/training_step_500.png ADDED Viewed

assets/examples/training_step_6000.png ADDED Viewed

assets/fish_2x.gif ADDED Viewed

Git LFS Details

SHA256: 9668ef45316f92d7c36db1e6d1854d2d413a2d87b32d73027149aeb02cc94e9d
Pointer size: 132 Bytes
Size of remote file: 2.48 MB

assets/gen_variations.jpg ADDED Viewed

Git LFS Details

SHA256: f9443d34ae70cc7d6d5123f7517b7f6e601ba6a59fedd63935e8dcd2dbf507e7
Pointer size: 132 Bytes
Size of remote file: 3.33 MB

assets/method.jpg ADDED Viewed

assets/night2day_results.jpg ADDED Viewed

Git LFS Details

SHA256: 2c2e0c3e5673e803482d881ab4df66e4e3103803e52daf48da43fb398742a3e8
Pointer size: 132 Bytes
Size of remote file: 2.37 MB

assets/rainy2clear.jpg ADDED Viewed

Git LFS Details

SHA256: ba435223d2c72430a9defeb7da94d43af9ddf67c32f11beb78c463f6a95347f5
Pointer size: 132 Bytes
Size of remote file: 2.49 MB

assets/teaser_results.jpg ADDED Viewed

Git LFS Details

SHA256: 55f14cff3825bf475ed7cf3847182a9689d4e7745204acbcd6ae8023d855e9ea
Pointer size: 132 Bytes
Size of remote file: 2.06 MB

docs/training_cyclegan_turbo.md ADDED Viewed

	@@ -0,0 +1,98 @@

+## Training with Unpaired Data (CycleGAN-turbo)
+Here, we show how to train a CycleGAN-turbo model using unpaired data.
+We will use the [horse2zebra dataset](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/blob/master/docs/datasets.md) introduced by [CycleGAN](https://junyanz.github.io/CycleGAN/) as an example dataset.
+### Step 1. Get the Dataset
+- First download the horse2zebra dataset from [here](https://www.cs.cmu.edu/~img2img-turbo/data/my_horse2zebra.zip) using the command below.
+    ```
+    bash scripts/download_horse2zebra.sh
+    ```
+- Our training scripts expect the dataset to be in the following format:
+    ```
+    data
+    ├── dataset_name
+    │   ├── train_A
+    │   │   ├── 000000.png
+    │   │   ├── 000001.png
+    │   │   └── ...
+    │   ├── train_B
+    │   │   ├── 000000.png
+    │   │   ├── 000001.png
+    │   │   └── ...
+    │   └── fixed_prompt_a.txt
+    |   └── fixed_prompt_b.txt
+    |
+    |   ├── test_A
+    │   │   ├── 000000.png
+    │   │   ├── 000001.png
+    │   │   └── ...
+    │   ├── test_B
+    │   │   ├── 000000.png
+    │   │   ├── 000001.png
+    │   │   └── ...
+    ```
+- The `fixed_prompt_a.txt` and `fixed_prompt_b.txt` files contain the **fixed caption** used for the source and target domains respectively.
+### Step 2. Train the Model
+- Initialize the `accelerate` environment with the following command:
+    ```
+    accelerate config
+    ```
+- Run the following command to train the model.
+    ```
+    export NCCL_P2P_DISABLE=1
+    accelerate launch --main_process_port 29501 src/train_cyclegan_turbo.py \
+        --pretrained_model_name_or_path="stabilityai/sd-turbo" \
+        --output_dir="output/cyclegan_turbo/my_horse2zebra" \
+        --dataset_folder "data/my_horse2zebra" \
+        --train_img_prep "resize_286_randomcrop_256x256_hflip" --val_img_prep "no_resize" \
+        --learning_rate="1e-5" --max_train_steps=25000 \
+        --train_batch_size=1 --gradient_accumulation_steps=1 \
+        --report_to "wandb" --tracker_project_name "gparmar_unpaired_h2z_cycle_debug_v2" \
+        --enable_xformers_memory_efficient_attention --validation_steps 250 \
+        --lambda_gan 0.5 --lambda_idt 1 --lambda_cycle 1
+    ```
+- Additional optional flags:
+    - `--enable_xformers_memory_efficient_attention`: Enable memory-efficient attention in the model.
+### Step 3. Monitor the training progress
+- You can monitor the training progress using the [Weights & Biases](https://wandb.ai/site) dashboard.
+- The training script will visualizing the training batch, the training losses, and validation set L2, LPIPS, and FID scores (if specified).
+    <div>
+        <p align="center">
+        <img src='../assets/examples/training_evaluation.png' align="center" width=800px>
+        </p>
+    </div>
+- The model checkpoints will be saved in the `<output_dir>/checkpoints` directory.
+### Step 4. Running Inference with the trained models
+- You can run inference using the trained model using the following command:
+    ```
+    python src/inference_unpaired.py --model_path "output/cyclegan_turbo/my_horse2zebra/checkpoints/model_1001.pkl" \
+        --input_image "data/my_horse2zebra/test_A/n02381460_20.jpg" \
+        --prompt "picture of a zebra" --direction "a2b" \
+        --output_dir "outputs" --image_prep "no_resize"
+    ```
+- The above command should generate the following output:
+    <table>
+    <tr>
+    <th>Model Input</th>
+    <th>Model Output</th>
+    </tr>
+    <tr>
+    <td><img src='../assets/examples/my_horse2zebra_input.jpg' width="200px"></td>
+    <td><img src='../assets/examples/my_horse2zebra_output.jpg' width="200px"></td>
+    </tr>
+    </table>

docs/training_pix2pix_turbo.md ADDED Viewed

	@@ -0,0 +1,118 @@

+## Training with Paired Data (pix2pix-turbo)
+Here, we show how to train a pix2pix-turbo model using paired data.
+We will use the [Fill50k dataset](https://github.com/lllyasviel/ControlNet/blob/main/docs/train.md) used by [ControlNet](https://github.com/lllyasviel/ControlNet) as an example dataset.
+### Step 1. Get the Dataset
+- First download a modified Fill50k dataset from [here](https://www.cs.cmu.edu/~img2img-turbo/data/my_fill50k.zip) using the command below.
+    ```
+    bash scripts/download_fill50k.sh
+    ```
+- Our training scripts expect the dataset to be in the following format:
+    ```
+    data
+    ├── dataset_name
+    │   ├── train_A
+    │   │   ├── 000000.png
+    │   │   ├── 000001.png
+    │   │   └── ...
+    │   ├── train_B
+    │   │   ├── 000000.png
+    │   │   ├── 000001.png
+    │   │   └── ...
+    │   └── train_prompts.json
+    |
+    |   ├── test_A
+    │   │   ├── 000000.png
+    │   │   ├── 000001.png
+    │   │   └── ...
+    │   ├── test_B
+    │   │   ├── 000000.png
+    │   │   ├── 000001.png
+    │   │   └── ...
+    │   └── test_prompts.json
+    ```
+### Step 2. Train the Model
+- Initialize the `accelerate` environment with the following command:
+    ```
+    accelerate config
+    ```
+- Run the following command to train the model.
+    ```
+    accelerate launch src/train_pix2pix_turbo.py \
+        --pretrained_model_name_or_path="stabilityai/sd-turbo" \
+        --output_dir="output/pix2pix_turbo/fill50k" \
+        --dataset_folder="data/my_fill50k" \
+        --resolution=512 \
+        --train_batch_size=2 \
+        --enable_xformers_memory_efficient_attention --viz_freq 25 \
+        --track_val_fid \
+        --report_to "wandb" --tracker_project_name "pix2pix_turbo_fill50k"
+    ```
+- Additional optional flags:
+    - `--track_val_fid`: Track FID score on the validation set using the [Clean-FID](https://github.com/GaParmar/clean-fid) implementation.
+    - `--enable_xformers_memory_efficient_attention`: Enable memory-efficient attention in the model.
+    - `--viz_freq`: Frequency of visualizing the results during training.
+### Step 3. Monitor the training progress
+- You can monitor the training progress using the [Weights & Biases](https://wandb.ai/site) dashboard.
+- The training script will visualizing the training batch, the training losses, and validation set L2, LPIPS, and FID scores (if specified).
+    <div>
+        <p align="center">
+        <img src='../assets/examples/training_evaluation.png' align="center" width=800px>
+        </p>
+    </div>
+- The model checkpoints will be saved in the `<output_dir>/checkpoints` directory.
+- Screenshots of the training progress are shown below:
+    - Step 0:
+    <div>
+        <p align="center">
+        <img src='../assets/examples/training_step_0.png' align="center" width=800px>
+        </p>
+    </div>
+    - Step 500:
+    <div>
+        <p align="center">
+        <img src='../assets/examples/training_step_500.png' align="center" width=800px>
+        </p>
+    </div>
+    - Step 6000:
+    <div>
+        <p align="center">
+        <img src='../assets/examples/training_step_6000.png' align="center" width=800px>
+        </p>
+    </div>
+### Step 4. Running Inference with the trained models
+- You can run inference using the trained model using the following command:
+    ```
+    python src/inference_paired.py --model_path "output/pix2pix_turbo/fill50k/checkpoints/model_6001.pkl" \
+        --input_image "data/my_fill50k/test_A/40000.png" \
+        --prompt "violet circle with orange background" \
+        --output_dir "outputs"
+    ```
+- The above command should generate the following output:
+    <table>
+    <tr>
+    <th>Model Input</th>
+    <th>Model Output</th>
+    </tr>
+    <tr>
+    <td><img src='../assets/examples/circles_inference_input.png' width="200px"></td>
+    <td><img src='../assets/examples/circles_inference_output.png' width="200px"></td>
+    </tr>
+    </table>

environment.yaml ADDED Viewed

	@@ -0,0 +1,34 @@

+name: img2img-turbo
+channels:
+  - pytorch
+  - defaults
+dependencies:
+  - python=3.10
+  - pip:
+      - clip @ git+https://github.com/openai/CLIP.git
+      - einops>=0.6.1
+      - numpy>=1.24.4
+      - open-clip-torch>=2.20.0
+      - opencv-python==4.6.0.66
+      - pillow>=9.5.0
+      - scipy==1.11.1
+      - timm>=0.9.2
+      - tokenizers
+      - torch>=2.0.1
+      - torchaudio>=2.0.2
+      - torchdata==0.6.1
+      - torchmetrics>=1.0.1
+      - torchvision>=0.15.2
+      - tqdm>=4.65.0
+      - transformers==4.35.2
+      - urllib3<1.27,>=1.25.4
+      - xformers>=0.0.20
+      - streamlit-keyup==0.2.0
+      - lpips
+      - clean-fid
+      - peft
+      - dominate
+      - diffusers==0.25.1
+      - gradio==3.43.1

gradio_canny2image.py ADDED Viewed

	@@ -0,0 +1,78 @@

+import numpy as np
+from PIL import Image
+import torch
+from torchvision import transforms
+import gradio as gr
+from src.image_prep import canny_from_pil
+from src.pix2pix_turbo import Pix2Pix_Turbo
+model = Pix2Pix_Turbo("edge_to_image")
+def process(input_image, prompt, low_threshold, high_threshold):
+    # resize to be a multiple of 8
+    new_width = input_image.width - input_image.width % 8
+    new_height = input_image.height - input_image.height % 8
+    input_image = input_image.resize((new_width, new_height))
+    canny = canny_from_pil(input_image, low_threshold, high_threshold)
+    with torch.no_grad():
+        c_t = transforms.ToTensor()(canny).unsqueeze(0).cuda()
+        output_image = model(c_t, prompt)
+        output_pil = transforms.ToPILImage()(output_image[0].cpu() * 0.5 + 0.5)
+    # flippy canny values, map all 0s to 1s and 1s to 0s
+    canny_viz = 1 - (np.array(canny) / 255)
+    canny_viz = Image.fromarray((canny_viz * 255).astype(np.uint8))
+    return canny_viz, output_pil
+if __name__ == "__main__":
+    # load the model
+    with gr.Blocks() as demo:
+        gr.Markdown("# Pix2pix-Turbo: **Canny Edge -> Image**")
+        with gr.Row():
+            with gr.Column():
+                input_image = gr.Image(sources="upload", type="pil")
+                prompt = gr.Textbox(label="Prompt")
+                low_threshold = gr.Slider(
+                    label="Canny low threshold",
+                    minimum=1,
+                    maximum=255,
+                    value=100,
+                    step=10,
+                )
+                high_threshold = gr.Slider(
+                    label="Canny high threshold",
+                    minimum=1,
+                    maximum=255,
+                    value=200,
+                    step=10,
+                )
+                run_button = gr.Button(value="Run")
+            with gr.Column():
+                result_canny = gr.Image(type="pil")
+            with gr.Column():
+                result_output = gr.Image(type="pil")
+        prompt.submit(
+            fn=process,
+            inputs=[input_image, prompt, low_threshold, high_threshold],
+            outputs=[result_canny, result_output],
+        )
+        low_threshold.change(
+            fn=process,
+            inputs=[input_image, prompt, low_threshold, high_threshold],
+            outputs=[result_canny, result_output],
+        )
+        high_threshold.change(
+            fn=process,
+            inputs=[input_image, prompt, low_threshold, high_threshold],
+            outputs=[result_canny, result_output],
+        )
+        run_button.click(
+            fn=process,
+            inputs=[input_image, prompt, low_threshold, high_threshold],
+            outputs=[result_canny, result_output],
+        )
+    demo.queue()
+    demo.launch(debug=True, share=False)

gradio_sketch2image.py ADDED Viewed

	@@ -0,0 +1,382 @@

+import random
+import numpy as np
+from PIL import Image
+import base64
+from io import BytesIO
+import torch
+import torchvision.transforms.functional as F
+import gradio as gr
+from src.pix2pix_turbo import Pix2Pix_Turbo
+model = Pix2Pix_Turbo("sketch_to_image_stochastic")
+style_list = [
+    {
+        "name": "Cinematic",
+        "prompt": "cinematic still {prompt} . emotional, harmonious, vignette, highly detailed, high budget, bokeh, cinemascope, moody, epic, gorgeous, film grain, grainy",
+    },
+    {
+        "name": "3D Model",
+        "prompt": "professional 3d model {prompt} . octane render, highly detailed, volumetric, dramatic lighting",
+    },
+    {
+        "name": "Anime",
+        "prompt": "anime artwork {prompt} . anime style, key visual, vibrant, studio anime,  highly detailed",
+    },
+    {
+        "name": "Digital Art",
+        "prompt": "concept art {prompt} . digital artwork, illustrative, painterly, matte painting, highly detailed",
+    },
+    {
+        "name": "Photographic",
+        "prompt": "cinematic photo {prompt} . 35mm photograph, film, bokeh, professional, 4k, highly detailed",
+    },
+    {
+        "name": "Pixel art",
+        "prompt": "pixel-art {prompt} . low-res, blocky, pixel art style, 8-bit graphics",
+    },
+    {
+        "name": "Fantasy art",
+        "prompt": "ethereal fantasy concept art of  {prompt} . magnificent, celestial, ethereal, painterly, epic, majestic, magical, fantasy art, cover art, dreamy",
+    },
+    {
+        "name": "Neonpunk",
+        "prompt": "neonpunk style {prompt} . cyberpunk, vaporwave, neon, vibes, vibrant, stunningly beautiful, crisp, detailed, sleek, ultramodern, magenta highlights, dark purple shadows, high contrast, cinematic, ultra detailed, intricate, professional",
+    },
+    {
+        "name": "Manga",
+        "prompt": "manga style {prompt} . vibrant, high-energy, detailed, iconic, Japanese comic style",
+    },
+]
+styles = {k["name"]: k["prompt"] for k in style_list}
+STYLE_NAMES = list(styles.keys())
+DEFAULT_STYLE_NAME = "Fantasy art"
+MAX_SEED = np.iinfo(np.int32).max
+def pil_image_to_data_uri(img, format="PNG"):
+    buffered = BytesIO()
+    img.save(buffered, format=format)
+    img_str = base64.b64encode(buffered.getvalue()).decode()
+    return f"data:image/{format.lower()};base64,{img_str}"
+def run(image, prompt, prompt_template, style_name, seed, val_r):
+    print(f"prompt: {prompt}")
+    print("sketch updated")
+    if image is None:
+        ones = Image.new("L", (512, 512), 255)
+        temp_uri = pil_image_to_data_uri(ones)
+        return ones, gr.update(link=temp_uri), gr.update(link=temp_uri)
+    prompt = prompt_template.replace("{prompt}", prompt)
+    image = image.convert("RGB")
+    image_t = F.to_tensor(image) > 0.5
+    print(f"r_val={val_r}, seed={seed}")
+    with torch.no_grad():
+        c_t = image_t.unsqueeze(0).cuda().float()
+        torch.manual_seed(seed)
+        B, C, H, W = c_t.shape
+        noise = torch.randn((1, 4, H // 8, W // 8), device=c_t.device)
+        output_image = model(c_t, prompt, deterministic=False, r=val_r, noise_map=noise)
+    output_pil = F.to_pil_image(output_image[0].cpu() * 0.5 + 0.5)
+    input_sketch_uri = pil_image_to_data_uri(Image.fromarray(255 - np.array(image)))
+    output_image_uri = pil_image_to_data_uri(output_pil)
+    return (
+        output_pil,
+        gr.update(link=input_sketch_uri),
+        gr.update(link=output_image_uri),
+    )
+def update_canvas(use_line, use_eraser):
+    if use_eraser:
+        _color = "#ffffff"
+        brush_size = 20
+    if use_line:
+        _color = "#000000"
+        brush_size = 4
+    return gr.update(brush_radius=brush_size, brush_color=_color, interactive=True)
+def upload_sketch(file):
+    _img = Image.open(file.name)
+    _img = _img.convert("L")
+    return gr.update(value=_img, source="upload", interactive=True)
+scripts = """
+async () => {
+    globalThis.theSketchDownloadFunction = () => {
+        console.log("test")
+        var link = document.createElement("a");
+        dataUri = document.getElementById('download_sketch').href
+        link.setAttribute("href", dataUri)
+        link.setAttribute("download", "sketch.png")
+        document.body.appendChild(link); // Required for Firefox
+        link.click();
+        document.body.removeChild(link); // Clean up
+        // also call the output download function
+        theOutputDownloadFunction();
+      return false
+    }
+    globalThis.theOutputDownloadFunction = () => {
+        console.log("test output download function")
+        var link = document.createElement("a");
+        dataUri = document.getElementById('download_output').href
+        link.setAttribute("href", dataUri);
+        link.setAttribute("download", "output.png");
+        document.body.appendChild(link); // Required for Firefox
+        link.click();
+        document.body.removeChild(link); // Clean up
+      return false
+    }
+    globalThis.UNDO_SKETCH_FUNCTION = () => {
+        console.log("undo sketch function")
+        var button_undo = document.querySelector('#input_image > div.image-container.svelte-p3y7hu > div.svelte-s6ybro > button:nth-child(1)');
+        // Create a new 'click' event
+        var event = new MouseEvent('click', {
+            'view': window,
+            'bubbles': true,
+            'cancelable': true
+        });
+        button_undo.dispatchEvent(event);
+    }
+    globalThis.DELETE_SKETCH_FUNCTION = () => {
+        console.log("delete sketch function")
+        var button_del = document.querySelector('#input_image > div.image-container.svelte-p3y7hu > div.svelte-s6ybro > button:nth-child(2)');
+        // Create a new 'click' event
+        var event = new MouseEvent('click', {
+            'view': window,
+            'bubbles': true,
+            'cancelable': true
+        });
+        button_del.dispatchEvent(event);
+    }
+    globalThis.togglePencil = () => {
+        el_pencil = document.getElementById('my-toggle-pencil');
+        el_pencil.classList.toggle('clicked');
+        // simulate a click on the gradio button
+        btn_gradio = document.querySelector("#cb-line > label > input");
+        var event = new MouseEvent('click', {
+            'view': window,
+            'bubbles': true,
+            'cancelable': true
+        });
+        btn_gradio.dispatchEvent(event);
+        if (el_pencil.classList.contains('clicked')) {
+            document.getElementById('my-toggle-eraser').classList.remove('clicked');
+            document.getElementById('my-div-pencil').style.backgroundColor = "gray";
+            document.getElementById('my-div-eraser').style.backgroundColor = "white";
+        }
+        else {
+            document.getElementById('my-toggle-eraser').classList.add('clicked');
+            document.getElementById('my-div-pencil').style.backgroundColor = "white";
+            document.getElementById('my-div-eraser').style.backgroundColor = "gray";
+        }
+    }
+    globalThis.toggleEraser = () => {
+        element = document.getElementById('my-toggle-eraser');
+        element.classList.toggle('clicked');
+        // simulate a click on the gradio button
+        btn_gradio = document.querySelector("#cb-eraser > label > input");
+        var event = new MouseEvent('click', {
+            'view': window,
+            'bubbles': true,
+            'cancelable': true
+        });
+        btn_gradio.dispatchEvent(event);
+        if (element.classList.contains('clicked')) {
+            document.getElementById('my-toggle-pencil').classList.remove('clicked');
+            document.getElementById('my-div-pencil').style.backgroundColor = "white";
+            document.getElementById('my-div-eraser').style.backgroundColor = "gray";
+        }
+        else {
+            document.getElementById('my-toggle-pencil').classList.add('clicked');
+            document.getElementById('my-div-pencil').style.backgroundColor = "gray";
+            document.getElementById('my-div-eraser').style.backgroundColor = "white";
+        }
+    }
+}
+"""
+with gr.Blocks(css="style.css") as demo:
+    gr.HTML(
+        """
+        <div style="display: flex; justify-content: center; align-items: center; text-align: center;">
+            <div>
+                <h2><a href="https://github.com/GaParmar/img2img-turbo">One-Step Image Translation with Text-to-Image Models</a></h2>
+                <div>
+                    <div style="display: flex; justify-content: center; align-items: center; text-align: center;">
+                        <a href='https://gauravparmar.com/'>Gaurav Parmar, </a>
+                        &nbsp;
+                        <a href='https://taesung.me/'> Taesung Park,</a>
+                        &nbsp;
+                        <a href='https://www.cs.cmu.edu/~srinivas/'>Srinivasa Narasimhan, </a>
+                        &nbsp;
+                        <a href='https://www.cs.cmu.edu/~junyanz/'> Jun-Yan Zhu </a>
+                    </div>
+                </div>
+                </br>
+                <div style="display: flex; justify-content: center; align-items: center; text-align: center;">
+                    <a href='https://arxiv.org/abs/2403.12036'>
+                        <img src="https://img.shields.io/badge/arXiv-2403.12036-red">
+                    </a>
+                    &nbsp;
+                    <a href='https://github.com/GaParmar/img2img-turbo'>
+                        <img src='https://img.shields.io/badge/github-%23121011.svg'>
+                    </a>
+                    &nbsp;
+                    <a href='https://github.com/GaParmar/img2img-turbo/blob/main/LICENSE'>
+                        <img src='https://img.shields.io/badge/license-MIT-lightgrey'>
+                    </a>
+                </div>
+            </div>
+        </div>
+        <div>
+        </br>
+        </div>
+        """
+    )
+    # these are hidden buttons that are used to trigger the canvas changes
+    line = gr.Checkbox(label="line", value=False, elem_id="cb-line")
+    eraser = gr.Checkbox(label="eraser", value=False, elem_id="cb-eraser")
+    with gr.Row(elem_id="main_row"):
+        with gr.Column(elem_id="column_input"):
+            gr.Markdown("## INPUT", elem_id="input_header")
+            image = gr.Image(
+                source="canvas",
+                tool="color-sketch",
+                type="pil",
+                image_mode="L",
+                invert_colors=True,
+                shape=(512, 512),
+                brush_radius=4,
+                height=440,
+                width=440,
+                brush_color="#000000",
+                interactive=True,
+                show_download_button=True,
+                elem_id="input_image",
+                show_label=False,
+            )
+            download_sketch = gr.Button(
+                "Download sketch", scale=1, elem_id="download_sketch"
+            )
+            gr.HTML(
+                """
+            <div class="button-row">
+                <div id="my-div-pencil" class="pad2"> <button id="my-toggle-pencil" onclick="return togglePencil(this)"></button> </div>
+                <div id="my-div-eraser" class="pad2"> <button id="my-toggle-eraser" onclick="return toggleEraser(this)"></button> </div>
+                <div class="pad2"> <button id="my-button-undo" onclick="return UNDO_SKETCH_FUNCTION(this)"></button> </div>
+                <div class="pad2"> <button id="my-button-clear" onclick="return DELETE_SKETCH_FUNCTION(this)"></button> </div>
+                <div class="pad2"> <button href="TODO" download="image" id="my-button-down" onclick='return theSketchDownloadFunction()'></button> </div>
+            </div>
+            """
+            )
+            # gr.Markdown("## Prompt", elem_id="tools_header")
+            prompt = gr.Textbox(label="Prompt", value="", show_label=True)
+            with gr.Row():
+                style = gr.Dropdown(
+                    label="Style",
+                    choices=STYLE_NAMES,
+                    value=DEFAULT_STYLE_NAME,
+                    scale=1,
+                )
+                prompt_temp = gr.Textbox(
+                    label="Prompt Style Template",
+                    value=styles[DEFAULT_STYLE_NAME],
+                    scale=2,
+                    max_lines=1,
+                )
+            with gr.Row():
+                val_r = gr.Slider(
+                    label="Sketch guidance: ",
+                    show_label=True,
+                    minimum=0,
+                    maximum=1,
+                    value=0.4,
+                    step=0.01,
+                    scale=3,
+                )
+                seed = gr.Textbox(label="Seed", value=42, scale=1, min_width=50)
+                randomize_seed = gr.Button("Random", scale=1, min_width=50)
+        with gr.Column(elem_id="column_process", min_width=50, scale=0.4):
+            gr.Markdown("## pix2pix-turbo", elem_id="description")
+            run_button = gr.Button("Run", min_width=50)
+        with gr.Column(elem_id="column_output"):
+            gr.Markdown("## OUTPUT", elem_id="output_header")
+            result = gr.Image(
+                label="Result",
+                height=440,
+                width=440,
+                elem_id="output_image",
+                show_label=False,
+                show_download_button=True,
+            )
+            download_output = gr.Button("Download output", elem_id="download_output")
+            gr.Markdown("### Instructions")
+            gr.Markdown("**1**. Enter a text prompt (e.g. cat)")
+            gr.Markdown("**2**. Start sketching")
+            gr.Markdown("**3**. Change the image style using a style template")
+            gr.Markdown("**4**. Adjust the effect of sketch guidance using the slider")
+            gr.Markdown("**5**. Try different seeds to generate different results")
+    eraser.change(
+        fn=lambda x: gr.update(value=not x),
+        inputs=[eraser],
+        outputs=[line],
+        queue=False,
+        api_name=False,
+    ).then(update_canvas, [line, eraser], [image])
+    line.change(
+        fn=lambda x: gr.update(value=not x),
+        inputs=[line],
+        outputs=[eraser],
+        queue=False,
+        api_name=False,
+    ).then(update_canvas, [line, eraser], [image])
+    demo.load(None, None, None, _js=scripts)
+    randomize_seed.click(
+        lambda x: random.randint(0, MAX_SEED),
+        inputs=[],
+        outputs=seed,
+        queue=False,
+        api_name=False,
+    )
+    inputs = [image, prompt, prompt_temp, style, seed, val_r]
+    outputs = [result, download_sketch, download_output]
+    prompt.submit(fn=run, inputs=inputs, outputs=outputs, api_name=False)
+    style.change(
+        lambda x: styles[x],
+        inputs=[style],
+        outputs=[prompt_temp],
+        queue=False,
+        api_name=False,
+    ).then(
+        fn=run,
+        inputs=inputs,
+        outputs=outputs,
+        api_name=False,
+    )
+    val_r.change(run, inputs=inputs, outputs=outputs, queue=False, api_name=False)
+    run_button.click(fn=run, inputs=inputs, outputs=outputs, api_name=False)
+    image.change(run, inputs=inputs, outputs=outputs, queue=False, api_name=False)
+if __name__ == "__main__":
+    demo.queue().launch(debug=True, share=True)

python==3.9.8/Lib/site-packages/wheel/cli/tags.py ADDED Viewed

	@@ -0,0 +1,139 @@

+from __future__ import annotations
+import email.policy
+import itertools
+import os
+from collections.abc import Iterable
+from email.parser import BytesParser
+from ..wheelfile import WheelFile
+def _compute_tags(original_tags: Iterable[str], new_tags: str | None) -> set[str]:
+    """Add or replace tags. Supports dot-separated tags"""
+    if new_tags is None:
+        return set(original_tags)
+    if new_tags.startswith("+"):
+        return {*original_tags, *new_tags[1:].split(".")}
+    if new_tags.startswith("-"):
+        return set(original_tags) - set(new_tags[1:].split("."))
+    return set(new_tags.split("."))
+def tags(
+    wheel: str,
+    python_tags: str | None = None,
+    abi_tags: str | None = None,
+    platform_tags: str | None = None,
+    build_tag: str | None = None,
+    remove: bool = False,
+) -> str:
+    """Change the tags on a wheel file.
+    The tags are left unchanged if they are not specified. To specify "none",
+    use ["none"]. To append to the previous tags, a tag should start with a
+    "+".  If a tag starts with "-", it will be removed from existing tags.
+    Processing is done left to right.
+    :param wheel: The paths to the wheels
+    :param python_tags: The Python tags to set
+    :param abi_tags: The ABI tags to set
+    :param platform_tags: The platform tags to set
+    :param build_tag: The build tag to set
+    :param remove: Remove the original wheel
+    """
+    with WheelFile(wheel, "r") as f:
+        assert f.filename, f"{f.filename} must be available"
+        wheel_info = f.read(f.dist_info_path + "/WHEEL")
+        info = BytesParser(policy=email.policy.compat32).parsebytes(wheel_info)
+        original_wheel_name = os.path.basename(f.filename)
+        namever = f.parsed_filename.group("namever")
+        build = f.parsed_filename.group("build")
+        original_python_tags = f.parsed_filename.group("pyver").split(".")
+        original_abi_tags = f.parsed_filename.group("abi").split(".")
+        original_plat_tags = f.parsed_filename.group("plat").split(".")
+    tags: list[str] = info.get_all("Tag", [])
+    existing_build_tag = info.get("Build")
+    impls = {tag.split("-")[0] for tag in tags}
+    abivers = {tag.split("-")[1] for tag in tags}
+    platforms = {tag.split("-")[2] for tag in tags}
+    if impls != set(original_python_tags):
+        msg = f"Wheel internal tags {impls!r} != filename tags {original_python_tags!r}"
+        raise AssertionError(msg)
+    if abivers != set(original_abi_tags):
+        msg = f"Wheel internal tags {abivers!r} != filename tags {original_abi_tags!r}"
+        raise AssertionError(msg)
+    if platforms != set(original_plat_tags):
+        msg = (
+            f"Wheel internal tags {platforms!r} != filename tags {original_plat_tags!r}"
+        )
+        raise AssertionError(msg)
+    if existing_build_tag != build:
+        msg = (
+            f"Incorrect filename '{build}' "
+            f"& *.dist-info/WHEEL '{existing_build_tag}' build numbers"
+        )
+        raise AssertionError(msg)
+    # Start changing as needed
+    if build_tag is not None:
+        build = build_tag
+    final_python_tags = sorted(_compute_tags(original_python_tags, python_tags))
+    final_abi_tags = sorted(_compute_tags(original_abi_tags, abi_tags))
+    final_plat_tags = sorted(_compute_tags(original_plat_tags, platform_tags))
+    final_tags = [
+        namever,
+        ".".join(final_python_tags),
+        ".".join(final_abi_tags),
+        ".".join(final_plat_tags),
+    ]
+    if build:
+        final_tags.insert(1, build)
+    final_wheel_name = "-".join(final_tags) + ".whl"
+    if original_wheel_name != final_wheel_name:
+        del info["Tag"], info["Build"]
+        for a, b, c in itertools.product(
+            final_python_tags, final_abi_tags, final_plat_tags
+        ):
+            info["Tag"] = f"{a}-{b}-{c}"
+        if build:
+            info["Build"] = build
+        original_wheel_path = os.path.join(
+            os.path.dirname(f.filename), original_wheel_name
+        )
+        final_wheel_path = os.path.join(os.path.dirname(f.filename), final_wheel_name)
+        with WheelFile(original_wheel_path, "r") as fin, WheelFile(
+            final_wheel_path, "w"
+        ) as fout:
+            fout.comment = fin.comment  # preserve the comment
+            for item in fin.infolist():
+                if item.is_dir():
+                    continue
+                if item.filename == f.dist_info_path + "/RECORD":
+                    continue
+                if item.filename == f.dist_info_path + "/WHEEL":
+                    fout.writestr(item, info.as_bytes())
+                else:
+                    fout.writestr(item, fin.read(item))
+        if remove:
+            os.remove(original_wheel_path)
+    return final_wheel_name

python==3.9.8/conda-meta/history ADDED Viewed

	@@ -0,0 +1,19 @@

+==> 2024-08-08 09:16:40 <==
+# cmd: C:\ProgramData\miniconda3\Scripts\conda-script.py env create -f environment.yaml --p python==3.9.8
+# conda version: 24.1.2
++defaults/noarch::tzdata-2024a-h04d1e81_0
++defaults/win-64::bzip2-1.0.8-h2bbff1b_6
++defaults/win-64::ca-certificates-2024.7.2-haa95532_0
++defaults/win-64::libffi-3.4.4-hd77b12b_1
++defaults/win-64::openssl-3.0.14-h827c3e9_0
++defaults/win-64::pip-24.0-py310haa95532_0
++defaults/win-64::python-3.10.14-he1021f5_1
++defaults/win-64::setuptools-72.1.0-py310haa95532_0
++defaults/win-64::sqlite-3.45.3-h2bbff1b_0
++defaults/win-64::tk-8.6.14-h0416ee5_0
++defaults/win-64::vc-14.2-h2eaa2aa_4
++defaults/win-64::vs2015_runtime-14.29.30133-h43f2093_4
++defaults/win-64::wheel-0.43.0-py310haa95532_0
++defaults/win-64::xz-5.4.6-h8cc25b3_1
++defaults/win-64::zlib-1.2.13-h8cc25b3_1
+# update specs: ['pip', 'python=3.10']

requirements.txt ADDED Viewed

	@@ -0,0 +1,28 @@

+clip @ git+https://github.com/openai/CLIP.git
+einops>=0.6.1
+numpy>=1.24.4
+open-clip-torch>=2.20.0
+opencv-python==4.6.0.66
+pillow>=9.5.0
+scipy==1.11.1
+timm>=0.9.2
+tokenizers
+torch>=2.0.1
+torchaudio>=2.0.2
+torchdata==0.6.1
+torchmetrics>=1.0.1
+torchvision>=0.15.2
+tqdm>=4.65.0
+transformers==4.35.2
+triton==2.0.0
+urllib3<1.27,>=1.25.4
+xformers>=0.0.20
+streamlit-keyup==0.2.0
+lpips
+clean-fid
+peft
+dominate
+diffusers==0.25.1
+gradio==3.43.1

scripts/download_fill50k.sh ADDED Viewed

	@@ -0,0 +1,5 @@

+mkdir -p data
+wget https://www.cs.cmu.edu/~img2img-turbo/data/my_fill50k.zip -O data/my_fill50k.zip
+cd data
+unzip my_fill50k.zip
+rm my_fill50k.zip

scripts/download_horse2zebra.sh ADDED Viewed

	@@ -0,0 +1,5 @@

+mkdir -p data
+wget https://www.cs.cmu.edu/~img2img-turbo/data/my_horse2zebra.zip -O data/my_horse2zebra.zip
+cd data
+unzip my_horse2zebra.zip
+rm my_horse2zebra.zip

src/cyclegan_turbo.py ADDED Viewed

	@@ -0,0 +1,254 @@

+import os
+import sys
+import copy
+import torch
+import torch.nn as nn
+from transformers import AutoTokenizer, CLIPTextModel
+from diffusers import AutoencoderKL, UNet2DConditionModel
+from peft import LoraConfig
+from peft.utils import get_peft_model_state_dict
+p = "src/"
+sys.path.append(p)
+from model import make_1step_sched, my_vae_encoder_fwd, my_vae_decoder_fwd, download_url
+class VAE_encode(nn.Module):
+    def __init__(self, vae, vae_b2a=None):
+        super(VAE_encode, self).__init__()
+        self.vae = vae
+        self.vae_b2a = vae_b2a
+    def forward(self, x, direction):
+        assert direction in ["a2b", "b2a"]
+        if direction == "a2b":
+            _vae = self.vae
+        else:
+            _vae = self.vae_b2a
+        return _vae.encode(x).latent_dist.sample() * _vae.config.scaling_factor
+class VAE_decode(nn.Module):
+    def __init__(self, vae, vae_b2a=None):
+        super(VAE_decode, self).__init__()
+        self.vae = vae
+        self.vae_b2a = vae_b2a
+    def forward(self, x, direction):
+        assert direction in ["a2b", "b2a"]
+        if direction == "a2b":
+            _vae = self.vae
+        else:
+            _vae = self.vae_b2a
+        assert _vae.encoder.current_down_blocks is not None
+        _vae.decoder.incoming_skip_acts = _vae.encoder.current_down_blocks
+        x_decoded = (_vae.decode(x / _vae.config.scaling_factor).sample).clamp(-1, 1)
+        return x_decoded
+def initialize_unet(rank, return_lora_module_names=False):
+    unet = UNet2DConditionModel.from_pretrained("stabilityai/sd-turbo", subfolder="unet")
+    unet.requires_grad_(False)
+    unet.train()
+    l_target_modules_encoder, l_target_modules_decoder, l_modules_others = [], [], []
+    l_grep = ["to_k", "to_q", "to_v", "to_out.0", "conv", "conv1", "conv2", "conv_in", "conv_shortcut", "conv_out", "proj_out", "proj_in", "ff.net.2", "ff.net.0.proj"]
+    for n, p in unet.named_parameters():
+        if "bias" in n or "norm" in n: continue
+        for pattern in l_grep:
+            if pattern in n and ("down_blocks" in n or "conv_in" in n):
+                l_target_modules_encoder.append(n.replace(".weight",""))
+                break
+            elif pattern in n and "up_blocks" in n:
+                l_target_modules_decoder.append(n.replace(".weight",""))
+                break
+            elif pattern in n:
+                l_modules_others.append(n.replace(".weight",""))
+                break
+    lora_conf_encoder = LoraConfig(r=rank, init_lora_weights="gaussian",target_modules=l_target_modules_encoder, lora_alpha=rank)
+    lora_conf_decoder = LoraConfig(r=rank, init_lora_weights="gaussian",target_modules=l_target_modules_decoder, lora_alpha=rank)
+    lora_conf_others = LoraConfig(r=rank, init_lora_weights="gaussian",target_modules=l_modules_others, lora_alpha=rank)
+    unet.add_adapter(lora_conf_encoder, adapter_name="default_encoder")
+    unet.add_adapter(lora_conf_decoder, adapter_name="default_decoder")
+    unet.add_adapter(lora_conf_others, adapter_name="default_others")
+    unet.set_adapters(["default_encoder", "default_decoder", "default_others"])
+    if return_lora_module_names:
+        return unet, l_target_modules_encoder, l_target_modules_decoder, l_modules_others
+    else:
+        return unet
+def initialize_vae(rank=4, return_lora_module_names=False):
+    vae = AutoencoderKL.from_pretrained("stabilityai/sd-turbo", subfolder="vae")
+    vae.requires_grad_(False)
+    vae.encoder.forward = my_vae_encoder_fwd.__get__(vae.encoder, vae.encoder.__class__)
+    vae.decoder.forward = my_vae_decoder_fwd.__get__(vae.decoder, vae.decoder.__class__)
+    vae.requires_grad_(True)
+    vae.train()
+    # add the skip connection convs
+    vae.decoder.skip_conv_1 = torch.nn.Conv2d(512, 512, kernel_size=(1, 1), stride=(1, 1), bias=False).cuda().requires_grad_(True)
+    vae.decoder.skip_conv_2 = torch.nn.Conv2d(256, 512, kernel_size=(1, 1), stride=(1, 1), bias=False).cuda().requires_grad_(True)
+    vae.decoder.skip_conv_3 = torch.nn.Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False).cuda().requires_grad_(True)
+    vae.decoder.skip_conv_4 = torch.nn.Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False).cuda().requires_grad_(True)
+    torch.nn.init.constant_(vae.decoder.skip_conv_1.weight, 1e-5)
+    torch.nn.init.constant_(vae.decoder.skip_conv_2.weight, 1e-5)
+    torch.nn.init.constant_(vae.decoder.skip_conv_3.weight, 1e-5)
+    torch.nn.init.constant_(vae.decoder.skip_conv_4.weight, 1e-5)
+    vae.decoder.ignore_skip = False
+    vae.decoder.gamma = 1
+    l_vae_target_modules = ["conv1","conv2","conv_in", "conv_shortcut",
+        "conv", "conv_out", "skip_conv_1", "skip_conv_2", "skip_conv_3",
+        "skip_conv_4", "to_k", "to_q", "to_v", "to_out.0",
+    ]
+    vae_lora_config = LoraConfig(r=rank, init_lora_weights="gaussian", target_modules=l_vae_target_modules)
+    vae.add_adapter(vae_lora_config, adapter_name="vae_skip")
+    if return_lora_module_names:
+        return vae, l_vae_target_modules
+    else:
+        return vae
+class CycleGAN_Turbo(torch.nn.Module):
+    def __init__(self, pretrained_name=None, pretrained_path=None, ckpt_folder="checkpoints", lora_rank_unet=8, lora_rank_vae=4):
+        super().__init__()
+        self.tokenizer = AutoTokenizer.from_pretrained("stabilityai/sd-turbo", subfolder="tokenizer")
+        self.text_encoder = CLIPTextModel.from_pretrained("stabilityai/sd-turbo", subfolder="text_encoder").cuda()
+        self.sched = make_1step_sched()
+        vae = AutoencoderKL.from_pretrained("stabilityai/sd-turbo", subfolder="vae")
+        unet = UNet2DConditionModel.from_pretrained("stabilityai/sd-turbo", subfolder="unet")
+        vae.encoder.forward = my_vae_encoder_fwd.__get__(vae.encoder, vae.encoder.__class__)
+        vae.decoder.forward = my_vae_decoder_fwd.__get__(vae.decoder, vae.decoder.__class__)
+        # add the skip connection convs
+        vae.decoder.skip_conv_1 = torch.nn.Conv2d(512, 512, kernel_size=(1, 1), stride=(1, 1), bias=False).cuda()
+        vae.decoder.skip_conv_2 = torch.nn.Conv2d(256, 512, kernel_size=(1, 1), stride=(1, 1), bias=False).cuda()
+        vae.decoder.skip_conv_3 = torch.nn.Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False).cuda()
+        vae.decoder.skip_conv_4 = torch.nn.Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False).cuda()
+        vae.decoder.ignore_skip = False
+        self.unet, self.vae = unet, vae
+        if pretrained_name == "day_to_night":
+            url = "https://www.cs.cmu.edu/~img2img-turbo/models/day2night.pkl"
+            self.load_ckpt_from_url(url, ckpt_folder)
+            self.timesteps = torch.tensor([999], device="cuda").long()
+            self.caption = "driving in the night"
+            self.direction = "a2b"
+        elif pretrained_name == "night_to_day":
+            url = "https://www.cs.cmu.edu/~img2img-turbo/models/night2day.pkl"
+            self.load_ckpt_from_url(url, ckpt_folder)
+            self.timesteps = torch.tensor([999], device="cuda").long()
+            self.caption = "driving in the day"
+            self.direction = "b2a"
+        elif pretrained_name == "clear_to_rainy":
+            url = "https://www.cs.cmu.edu/~img2img-turbo/models/clear2rainy.pkl"
+            self.load_ckpt_from_url(url, ckpt_folder)
+            self.timesteps = torch.tensor([999], device="cuda").long()
+            self.caption = "driving in heavy rain"
+            self.direction = "a2b"
+        elif pretrained_name == "rainy_to_clear":
+            url = "https://www.cs.cmu.edu/~img2img-turbo/models/rainy2clear.pkl"
+            self.load_ckpt_from_url(url, ckpt_folder)
+            self.timesteps = torch.tensor([999], device="cuda").long()
+            self.caption = "driving in the day"
+            self.direction = "b2a"
+        elif pretrained_path is not None:
+            sd = torch.load(pretrained_path)
+            self.load_ckpt_from_state_dict(sd)
+            self.timesteps = torch.tensor([999], device="cuda").long()
+            self.caption = None
+            self.direction = None
+        self.vae_enc.cuda()
+        self.vae_dec.cuda()
+        self.unet.cuda()
+    def load_ckpt_from_state_dict(self, sd):
+        lora_conf_encoder = LoraConfig(r=sd["rank_unet"], init_lora_weights="gaussian", target_modules=sd["l_target_modules_encoder"], lora_alpha=sd["rank_unet"])
+        lora_conf_decoder = LoraConfig(r=sd["rank_unet"], init_lora_weights="gaussian", target_modules=sd["l_target_modules_decoder"], lora_alpha=sd["rank_unet"])
+        lora_conf_others = LoraConfig(r=sd["rank_unet"], init_lora_weights="gaussian", target_modules=sd["l_modules_others"], lora_alpha=sd["rank_unet"])
+        self.unet.add_adapter(lora_conf_encoder, adapter_name="default_encoder")
+        self.unet.add_adapter(lora_conf_decoder, adapter_name="default_decoder")
+        self.unet.add_adapter(lora_conf_others, adapter_name="default_others")
+        for n, p in self.unet.named_parameters():
+            name_sd = n.replace(".default_encoder.weight", ".weight")
+            if "lora" in n and "default_encoder" in n:
+                p.data.copy_(sd["sd_encoder"][name_sd])
+        for n, p in self.unet.named_parameters():
+            name_sd = n.replace(".default_decoder.weight", ".weight")
+            if "lora" in n and "default_decoder" in n:
+                p.data.copy_(sd["sd_decoder"][name_sd])
+        for n, p in self.unet.named_parameters():
+            name_sd = n.replace(".default_others.weight", ".weight")
+            if "lora" in n and "default_others" in n:
+                p.data.copy_(sd["sd_other"][name_sd])
+        self.unet.set_adapter(["default_encoder", "default_decoder", "default_others"])
+        vae_lora_config = LoraConfig(r=sd["rank_vae"], init_lora_weights="gaussian", target_modules=sd["vae_lora_target_modules"])
+        self.vae.add_adapter(vae_lora_config, adapter_name="vae_skip")
+        self.vae.decoder.gamma = 1
+        self.vae_b2a = copy.deepcopy(self.vae)
+        self.vae_enc = VAE_encode(self.vae, vae_b2a=self.vae_b2a)
+        self.vae_enc.load_state_dict(sd["sd_vae_enc"])
+        self.vae_dec = VAE_decode(self.vae, vae_b2a=self.vae_b2a)
+        self.vae_dec.load_state_dict(sd["sd_vae_dec"])
+    def load_ckpt_from_url(self, url, ckpt_folder):
+        os.makedirs(ckpt_folder, exist_ok=True)
+        outf = os.path.join(ckpt_folder, os.path.basename(url))
+        download_url(url, outf)
+        sd = torch.load(outf)
+        self.load_ckpt_from_state_dict(sd)
+    @staticmethod
+    def forward_with_networks(x, direction, vae_enc, unet, vae_dec, sched, timesteps, text_emb):
+        B = x.shape[0]
+        assert direction in ["a2b", "b2a"]
+        x_enc = vae_enc(x, direction=direction).to(x.dtype)
+        model_pred = unet(x_enc, timesteps, encoder_hidden_states=text_emb,).sample
+        x_out = torch.stack([sched.step(model_pred[i], timesteps[i], x_enc[i], return_dict=True).prev_sample for i in range(B)])
+        x_out_decoded = vae_dec(x_out, direction=direction)
+        return x_out_decoded
+    @staticmethod
+    def get_traininable_params(unet, vae_a2b, vae_b2a):
+        # add all unet parameters
+        params_gen = list(unet.conv_in.parameters())
+        unet.conv_in.requires_grad_(True)
+        unet.set_adapters(["default_encoder", "default_decoder", "default_others"])
+        for n,p in unet.named_parameters():
+            if "lora" in n and "default" in n:
+                assert p.requires_grad
+                params_gen.append(p)
+        # add all vae_a2b parameters
+        for n,p in vae_a2b.named_parameters():
+            if "lora" in n and "vae_skip" in n:
+                assert p.requires_grad
+                params_gen.append(p)
+        params_gen = params_gen + list(vae_a2b.decoder.skip_conv_1.parameters())
+        params_gen = params_gen + list(vae_a2b.decoder.skip_conv_2.parameters())
+        params_gen = params_gen + list(vae_a2b.decoder.skip_conv_3.parameters())
+        params_gen = params_gen + list(vae_a2b.decoder.skip_conv_4.parameters())
+        # add all vae_b2a parameters
+        for n,p in vae_b2a.named_parameters():
+            if "lora" in n and "vae_skip" in n:
+                assert p.requires_grad
+                params_gen.append(p)
+        params_gen = params_gen + list(vae_b2a.decoder.skip_conv_1.parameters())
+        params_gen = params_gen + list(vae_b2a.decoder.skip_conv_2.parameters())
+        params_gen = params_gen + list(vae_b2a.decoder.skip_conv_3.parameters())
+        params_gen = params_gen + list(vae_b2a.decoder.skip_conv_4.parameters())
+        return params_gen
+    def forward(self, x_t, direction=None, caption=None, caption_emb=None):
+        if direction is None:
+            assert self.direction is not None
+            direction = self.direction
+        if caption is None and caption_emb is None:
+            assert self.caption is not None
+            caption = self.caption
+        if caption_emb is not None:
+            caption_enc = caption_emb
+        else:
+            caption_tokens = self.tokenizer(caption, max_length=self.tokenizer.model_max_length,
+                    padding="max_length", truncation=True, return_tensors="pt").input_ids.to(x_t.device)
+            caption_enc = self.text_encoder(caption_tokens)[0].detach().clone()
+        return self.forward_with_networks(x_t, direction, self.vae_enc, self.unet, self.vae_dec, self.sched, self.timesteps, caption_enc)

src/image_prep.py ADDED Viewed

	@@ -0,0 +1,12 @@

+import numpy as np
+from PIL import Image
+import cv2
+def canny_from_pil(image, low_threshold=100, high_threshold=200):
+    image = np.array(image)
+    image = cv2.Canny(image, low_threshold, high_threshold)
+    image = image[:, :, None]
+    image = np.concatenate([image, image, image], axis=2)
+    control_image = Image.fromarray(image)
+    return control_image

src/inference_paired.py ADDED Viewed

	@@ -0,0 +1,65 @@

+import os
+import argparse
+import numpy as np
+from PIL import Image
+import torch
+from torchvision import transforms
+import torchvision.transforms.functional as F
+from pix2pix_turbo import Pix2Pix_Turbo
+from image_prep import canny_from_pil
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--input_image', type=str, required=True, help='path to the input image')
+    parser.add_argument('--prompt', type=str, required=True, help='the prompt to be used')
+    parser.add_argument('--model_name', type=str, default='', help='name of the pretrained model to be used')
+    parser.add_argument('--model_path', type=str, default='', help='path to a model state dict to be used')
+    parser.add_argument('--output_dir', type=str, default='output', help='the directory to save the output')
+    parser.add_argument('--low_threshold', type=int, default=100, help='Canny low threshold')
+    parser.add_argument('--high_threshold', type=int, default=200, help='Canny high threshold')
+    parser.add_argument('--gamma', type=float, default=0.4, help='The sketch interpolation guidance amount')
+    parser.add_argument('--seed', type=int, default=42, help='Random seed to be used')
+    args = parser.parse_args()
+    # only one of model_name and model_path should be provided
+    if args.model_name == '' != args.model_path == '':
+        raise ValueError('Either model_name or model_path should be provided')
+    os.makedirs(args.output_dir, exist_ok=True)
+    # initialize the model
+    model = Pix2Pix_Turbo(pretrained_name=args.model_name, pretrained_path=args.model_path)
+    model.set_eval()
+    # make sure that the input image is a multiple of 8
+    input_image = Image.open(args.input_image).convert('RGB')
+    new_width = input_image.width - input_image.width % 8
+    new_height = input_image.height - input_image.height % 8
+    input_image = input_image.resize((new_width, new_height), Image.LANCZOS)
+    bname = os.path.basename(args.input_image)
+    # translate the image
+    with torch.no_grad():
+        if args.model_name == 'edge_to_image':
+            canny = canny_from_pil(input_image, args.low_threshold, args.high_threshold)
+            canny_viz_inv = Image.fromarray(255 - np.array(canny))
+            canny_viz_inv.save(os.path.join(args.output_dir, bname.replace('.png', '_canny.png')))
+            c_t = F.to_tensor(canny).unsqueeze(0).cuda()
+            output_image = model(c_t, args.prompt)
+        elif args.model_name == 'sketch_to_image_stochastic':
+            image_t = F.to_tensor(input_image) < 0.5
+            c_t = image_t.unsqueeze(0).cuda().float()
+            torch.manual_seed(args.seed)
+            B, C, H, W = c_t.shape
+            noise = torch.randn((1, 4, H // 8, W // 8), device=c_t.device)
+            output_image = model(c_t, args.prompt, deterministic=False, r=args.gamma, noise_map=noise)
+        else:
+            c_t = F.to_tensor(input_image).unsqueeze(0).cuda()
+            output_image = model(c_t, args.prompt)
+        output_pil = transforms.ToPILImage()(output_image[0].cpu() * 0.5 + 0.5)
+    # save the output image
+    output_pil.save(os.path.join(args.output_dir, bname))

src/inference_unpaired.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import os
+import argparse
+from PIL import Image
+import torch
+from torchvision import transforms
+from cyclegan_turbo import CycleGAN_Turbo
+from my_utils.training_utils import build_transform
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--input_image', type=str, required=True, help='path to the input image')
+    parser.add_argument('--prompt', type=str, required=False, help='the prompt to be used. It is required when loading a custom model_path.')
+    parser.add_argument('--model_name', type=str, default=None, help='name of the pretrained model to be used')
+    parser.add_argument('--model_path', type=str, default=None, help='path to a local model state dict to be used')
+    parser.add_argument('--output_dir', type=str, default='output', help='the directory to save the output')
+    parser.add_argument('--image_prep', type=str, default='resize_512x512', help='the image preparation method')
+    parser.add_argument('--direction', type=str, default=None, help='the direction of translation. None for pretrained models, a2b or b2a for custom paths.')
+    args = parser.parse_args()
+    # only one of model_name and model_path should be provided
+    if args.model_name is None != args.model_path is None:
+        raise ValueError('Either model_name or model_path should be provided')
+    if args.model_path is not None and args.prompt is None:
+        raise ValueError('prompt is required when loading a custom model_path.')
+    if args.model_name is not None:
+        assert args.prompt is None, 'prompt is not required when loading a pretrained model.'
+        assert args.direction is None, 'direction is not required when loading a pretrained model.'
+    # initialize the model
+    model = CycleGAN_Turbo(pretrained_name=args.model_name, pretrained_path=args.model_path)
+    model.eval()
+    model.unet.enable_xformers_memory_efficient_attention()
+    T_val = build_transform(args.image_prep)
+    input_image = Image.open(args.input_image).convert('RGB')
+    # translate the image
+    with torch.no_grad():
+        input_img = T_val(input_image)
+        x_t = transforms.ToTensor()(input_img)
+        x_t = transforms.Normalize([0.5], [0.5])(x_t).unsqueeze(0).cuda()
+        output = model(x_t, direction=args.direction, caption=args.prompt)
+    output_pil = transforms.ToPILImage()(output[0].cpu() * 0.5 + 0.5)
+    output_pil = output_pil.resize((input_image.width, input_image.height), Image.LANCZOS)
+    # save the output image
+    bname = os.path.basename(args.input_image)
+    os.makedirs(args.output_dir, exist_ok=True)
+    output_pil.save(os.path.join(args.output_dir, bname))