IllusionDiffusion

Running on Zero

File size: 14,376 Bytes

4b87716
 
18274c1
453ed2e
 
49ad6a5
453ed2e
1a833ba
6914f7a
453ed2e
a29e3ba
00f6a78
9ad92f4
453ed2e
9ad92f4
4984c7e
 
be85eb8
 
453ed2e
e56af76
b31f6c0
96e351a
c000f9c
96e351a
be85eb8
ecc6c05
e266395
453ed2e
4b87716
 
 
 
00f6a78
 
a29e3ba
4984c7e
be85eb8
766763f
be85eb8
 
802f3de
 
 
 
 
 
 
 
be85eb8
ecc6c05
766763f
9ad92f4
00f6a78
453ed2e
00f6a78
ee36d88
ecc6c05
4984c7e
453ed2e
7391723
4b87716
 
 
 
be85eb8
e266395
 
 
 
 
 
 
 
 
 
ecc6c05
4984c7e
 
 
00f6a78
7391723
 
720f6c3
4984c7e
00f6a78
 
9ad92f4
453ed2e
a29e3ba
453ed2e
 
 
 
 
1ad3027
9ad92f4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1ad3027
4984c7e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1ad3027
4984c7e
 
 
 
 
 
 
1ad3027
86d5e88
 
 
b31f6c0
 
49ad6a5
1ad3027
49ad6a5
e56af76
49ad6a5
 
1ad3027
49ad6a5
e56af76
 
 
49ad6a5
4b87716
 
 
a29e3ba
4b87716
18274c1
453ed2e
811e3ea
453ed2e
 
01e1199
 
4984c7e
 
 
453ed2e
 
c000f9c
 
453ed2e
b31f6c0
 
 
 
453ed2e
a29e3ba
9ad92f4
a29e3ba
 
4984c7e
7391723
 
a29e3ba
1a833ba
7391723
1a833ba
a29e3ba
453ed2e
 
4984c7e
9ad92f4
 
453ed2e
4984c7e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b31f6c0
 
 
 
c000f9c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49ad6a5
4b87716
 
 
 
 
c000f9c
1ad3027
 
453ed2e
 
e07df8b
d7f12e2
283c403
28bf710
7eb9cc8
e07df8b
453ed2e
 
b770306
1ad3027
49ad6a5
 
1ad3027
 
453ed2e
 
d7f12e2
01e84fc
3784ad6
d7f12e2
01e84fc
 
 
9ad92f4
01e84fc
 
 
4f2286a
 
01e84fc
453ed2e
d7f12e2
96e351a
 
 
01e84fc
dc7aed1
1ad3027
fc70300
86d5e88
 
b31f6c0
ad4d288
1a833ba
a86f74c
563dfc0
1ad3027
 
453ed2e
86d5e88
 
b31f6c0
ad4d288
453ed2e
a86f74c
563dfc0
1ad3027
 
18274c1
c000f9c
1ad3027
c000f9c
 
 
 
 
 
0013e5a
 
 
453ed2e
 
0013e5a

# インポートと初期設定

import spaces
import torch
import gradio as gr
from gradio import processing_utils, utils
from PIL import Image
import random

from diffusers import (
    DiffusionPipeline,
    AutoencoderKL,
    StableDiffusionControlNetPipeline,
    ControlNetModel,
    StableDiffusionLatentUpscalePipeline,
    StableDiffusionImg2ImgPipeline,
    StableDiffusionControlNetImg2ImgPipeline,
    DPMSolverMultistepScheduler,
    EulerDiscreteScheduler
)
import tempfile
import time
from share_btn import community_icon_html, loading_icon_html, share_js
import user_history
from illusion_style import css
import os
from transformers import CLIPImageProcessor
from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker



# モデルの初期化

BASE_MODEL = "SG161222/Realistic_Vision_V5.1_noVAE"

# Initialize both pipelines
vae = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-mse", torch_dtype=torch.float16)
controlnet = ControlNetModel.from_pretrained("monster-labs/control_v1p_sd15_qrcode_monster", torch_dtype=torch.float16)

# Initialize the safety checker conditionally
SAFETY_CHECKER_ENABLED = os.environ.get("SAFETY_CHECKER", "0") == "1"
# safety_checker = None
# feature_extractor = None
# if SAFETY_CHECKER_ENABLED:
#     safety_checker = StableDiffusionSafetyChecker.from_pretrained("CompVis/stable-diffusion-safety-checker").to("cuda")
#     feature_extractor = CLIPImageProcessor.from_pretrained("openai/clip-vit-base-patch32")

# Initialize the safety checker conditionally
SAFETY_CHECKER_ENABLED = False  # 強制的に無効化
safety_checker = None
feature_extractor = None

main_pipe = StableDiffusionControlNetPipeline.from_pretrained(
    BASE_MODEL,
    controlnet=controlnet,
    vae=vae,
    safety_checker=safety_checker,
    feature_extractor=feature_extractor,
    torch_dtype=torch.float16,
).to("cuda")



# 関数の定義

# Function to check NSFW images
#def check_nsfw_images(images: list[Image.Image]) -> tuple[list[Image.Image], list[bool]]:
#    if SAFETY_CHECKER_ENABLED:
#        safety_checker_input = feature_extractor(images, return_tensors="pt").to("cuda")
#        has_nsfw_concepts = safety_checker(
#            images=[images],
#            clip_input=safety_checker_input.pixel_values.to("cuda")
#        )
#        return images, has_nsfw_concepts
#    else:
#        return images, [False] * len(images)
        
#main_pipe.unet = torch.compile(main_pipe.unet, mode="reduce-overhead", fullgraph=True)
#main_pipe.unet.to(memory_format=torch.channels_last)
#main_pipe.unet = torch.compile(main_pipe.unet, mode="reduce-overhead", fullgraph=True)
#model_id = "stabilityai/sd-x2-latent-upscaler"
image_pipe = StableDiffusionControlNetImg2ImgPipeline(**main_pipe.components)


#image_pipe.unet = torch.compile(image_pipe.unet, mode="reduce-overhead", fullgraph=True)
#upscaler = StableDiffusionLatentUpscalePipeline.from_pretrained(model_id, torch_dtype=torch.float16)
#upscaler.to("cuda")


# Sampler map
SAMPLER_MAP = {
    "DPM++ Karras SDE": lambda config: DPMSolverMultistepScheduler.from_config(config, use_karras=True, algorithm_type="sde-dpmsolver++"),
    "Euler": lambda config: EulerDiscreteScheduler.from_config(config),
}

# 入力画像を中央からクロップし、指定されたサイズにリサイズする
def center_crop_resize(img, output_size=(512, 512)):
    width, height = img.size

    # Calculate dimensions to crop to the center
    new_dimension = min(width, height)
    left = (width - new_dimension)/2
    top = (height - new_dimension)/2
    right = (width + new_dimension)/2
    bottom = (height + new_dimension)/2

    # Crop and resize
    img = img.crop((left, top, right, bottom))
    img = img.resize(output_size)

    return img

# 指定された方法で画像をアップスケールする
def common_upscale(samples, width, height, upscale_method, crop=False):
        if crop == "center":
            old_width = samples.shape[3]
            old_height = samples.shape[2]
            old_aspect = old_width / old_height
            new_aspect = width / height
            x = 0
            y = 0
            if old_aspect > new_aspect:
                x = round((old_width - old_width * (new_aspect / old_aspect)) / 2)
            elif old_aspect < new_aspect:
                y = round((old_height - old_height * (old_aspect / new_aspect)) / 2)
            s = samples[:,:,y:old_height-y,x:old_width-x]
        else:
            s = samples

        return torch.nn.functional.interpolate(s, size=(height, width), mode=upscale_method)

# common_upscale を利用して画像を指定された倍率でアップスケールする
def upscale(samples, upscale_method, scale_by):
        #s = samples.copy()
        width = round(samples["images"].shape[3] * scale_by)
        height = round(samples["images"].shape[2] * scale_by)
        s = common_upscale(samples["images"], width, height, upscale_method, "disabled")
        return (s)

# ユーザーの入力が適切かどうかをチェックする
def check_inputs(prompt: str, control_image: Image.Image):
    if control_image is None:
        raise gr.Error("Please select or upload an Input Illusion")
    if prompt is None or prompt == "":
        raise gr.Error("Prompt is required")

# Base64エンコードされた画像をPIL（Python Imaging Library）形式の画像に変換する
def convert_to_pil(base64_image):
    pil_image = Image.open(base64_image)
    return pil_image

# PIL形式の画像をBase64形式に変換する
def convert_to_base64(pil_image):
    with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as temp_file:
        image.save(temp_file.name)
    return temp_file.name



# 推論関数
# Inference function

@spaces.GPU
def inference(
    control_image: Image.Image,
    prompt: str,
    negative_prompt: str,
    guidance_scale: float = 8.0,
    controlnet_conditioning_scale: float = 1,
    control_guidance_start: float = 1,    
    control_guidance_end: float = 1,
    upscaler_strength: float = 0.5,
    seed: int = -1,
    sampler = "DPM++ Karras SDE",
    progress = gr.Progress(track_tqdm=True),
    profile: gr.OAuthProfile | None = None,
):
    start_time = time.time()
    start_time_struct = time.localtime(start_time)
    start_time_formatted = time.strftime("%H:%M:%S", start_time_struct)
    print(f"Inference started at {start_time_formatted}")
    
    # Generate the initial image
    #init_image = init_pipe(prompt).images[0]

    # Rest of your existing code
    control_image_small = center_crop_resize(control_image)
    control_image_large = center_crop_resize(control_image, (1024, 1024))

    main_pipe.scheduler = SAMPLER_MAP[sampler](main_pipe.scheduler.config)
    my_seed = random.randint(0, 2**32 - 1) if seed == -1 else seed
    generator = torch.Generator(device="cuda").manual_seed(my_seed)
    
    out = main_pipe(
        prompt=prompt,
        negative_prompt=negative_prompt,
        image=control_image_small,
        guidance_scale=float(guidance_scale),
        controlnet_conditioning_scale=float(controlnet_conditioning_scale),
        generator=generator,
        control_guidance_start=float(control_guidance_start),
        control_guidance_end=float(control_guidance_end),
        num_inference_steps=15,
        output_type="latent"
    )
    upscaled_latents = upscale(out, "nearest-exact", 2)
    out_image = image_pipe(
        prompt=prompt,
        negative_prompt=negative_prompt,
        control_image=control_image_large,        
        image=upscaled_latents,
        guidance_scale=float(guidance_scale),
        generator=generator,
        num_inference_steps=20,
        strength=upscaler_strength,
        control_guidance_start=float(control_guidance_start),
        control_guidance_end=float(control_guidance_end),
        controlnet_conditioning_scale=float(controlnet_conditioning_scale)
    )
    end_time = time.time()
    end_time_struct = time.localtime(end_time)
    end_time_formatted = time.strftime("%H:%M:%S", end_time_struct)
    print(f"Inference ended at {end_time_formatted}, taking {end_time-start_time}s")

    # Save image + metadata
    user_history.save_image(
        label=prompt,
        image=out_image["images"][0],
        profile=profile,
        metadata={
            "prompt": prompt,
            "negative_prompt": negative_prompt,
            "guidance_scale": guidance_scale,
            "controlnet_conditioning_scale": controlnet_conditioning_scale,
            "control_guidance_start": control_guidance_start,
            "control_guidance_end": control_guidance_end,
            "upscaler_strength": upscaler_strength,
            "seed": seed,
            "sampler": sampler,
        },
    )

    return out_image["images"][0], gr.update(visible=True), gr.update(visible=True), my_seed



# Gradio UIの構築

with gr.Blocks() as app:

    # アプリの紹介や説明。テキストやリンクを追加
    gr.Markdown(
        '''
        <div style="text-align: center;">
            <h1>Illusion Diffusion HQ 🌀</h1>
            <p style="font-size:16px;">Stable Diffusion で、驚くほど高品質なイリュージョン・アート作品を生成</p>
            <p>プロンプトとパターンが与えられれば QR コードで調整されたコントロール・ネットを使用して、驚くほど美しいイリュージョンを作成します。</p>
            <p><small>このプロジェクトは、<a href="https://huggingface.co/monster-labs/control_v1p_sd15_qrcode_monster">Monster Labs QR コントロール・ネット</a> を使用して機能します。Illusion Diffusion が安全性チェッカーとともに復活しました!<a href="https://twitter.com/angrypenguinPNG">作者</a> や大きな貢献をしてくれた <a href="https://twitter.com/multimodalart">multimodalart</a> を Twitter でフォローしてください。ワークフローを発見してくれた <a href="https://twitter.com/MrUgleh">MrUgleh</a> に感謝します :)　作者をサポートしたい場合は、<a href="https://deforum.studio">deforum.studio</a> の使用を検討してください。</small></p>
        </div>
        '''
    )

    # 状態の管理
    state_img_input = gr.State()
    state_img_output = gr.State()

    # アプリのレイアウトを設定
    with gr.Row():
        with gr.Column():
            control_image = gr.Image(label="イリュージョンのインプット（画像のレイアウトやストラクチャがわかるモノクロ画像）", type="pil", elem_id="control_image")
            controlnet_conditioning_scale = gr.Slider(minimum=0.0, maximum=5.0, step=0.01, value=0.8, label="イリュージョンの強さ", elem_id="illusion_strength", info="ControlNet 条件付けスケール")
            gr.Examples(examples=["_checkers.png", "_checkers_mid.jpg", "_pattern.png", "_ultra_checkers.png", "_spiral.jpeg", "_funky.jpeg" ], inputs=control_image)
            prompt = gr.Textbox(label="プロンプト（スタイル情報や登場させたいモチーフ）", elem_id="prompt", info="生成したいものを入力してください", placeholder="賑やかな通りと遠くに城がある中世の村の風景")
            negative_prompt = gr.Textbox(label="ネガティブ・プロンプト", info="生成したくないものを入力してください", value="low quality", elem_id="negative_prompt")
            with gr.Accordion(label="高度なオプション", open=False):
                guidance_scale = gr.Slider(minimum=0.0, maximum=50.0, step=0.25, value=7.5, label="ガイダンス・スケール")
                sampler = gr.Dropdown(choices=list(SAMPLER_MAP.keys()), value="Euler")
                control_start = gr.Slider(minimum=0.0, maximum=1.0, step=0.1, value=0, label="ControlNetの開始")
                control_end = gr.Slider(minimum=0.0, maximum=1.0, step=0.1, value=1, label="ControlNetの終了")
                strength = gr.Slider(minimum=0.0, maximum=1.0, step=0.1, value=1, label="アップスケーラーの強度")
                seed = gr.Slider(minimum=-1, maximum=9999999999, step=1, value=-1, label="シード", info="-1 はランダム・シードです")
                used_seed = gr.Number(label="最後に使用したシード",interactive=False)
            run_btn = gr.Button("実行")
        with gr.Column():
            result_image = gr.Image(label="イリュージョン・ディフュージョンのアウトプット", interactive=False, elem_id="output")
            with gr.Group(elem_id="share-btn-container", visible=False) as share_group:
                community_icon = gr.HTML(community_icon_html)
                loading_icon = gr.HTML(loading_icon_html)
                share_button = gr.Button("コミュニティにシェア", elem_id="share-btn")

    # テキストボックスに入力されたプロンプトが送信されたときに実行されるイベントを設定
    prompt.submit(
        check_inputs,
        inputs=[prompt, control_image],
        queue=False
    ).success(
        inference,
        inputs=[control_image, prompt, negative_prompt, guidance_scale, controlnet_conditioning_scale, control_start, control_end, strength, seed, sampler],
        outputs=[result_image, result_image, share_group, used_seed])

    # 「Run」ボタンがクリックされたときに実行されるイベントを設定
    run_btn.click(
        check_inputs,
        inputs=[prompt, control_image],
        queue=False
    ).success(
        inference,
        inputs=[control_image, prompt, negative_prompt, guidance_scale, controlnet_conditioning_scale, control_start, control_end, strength, seed, sampler],
        outputs=[result_image, result_image, share_group, used_seed])

    # 共有ボタンがクリックされたときに実行されるイベントを設定
    share_button.click(None, [], [], js=share_js)

# アプリケーションの起動
with gr.Blocks(css=css) as app_with_history:
    with gr.Tab("Demo"):
        app.render()
    with gr.Tab("Past generations"):
        user_history.render()

# app_with_history.queue(max_size=20,api_open=True )
# if __name__ == "__main__":
#     app_with_history.launch(max_threads=400)

if __name__ == "__main__":
    app_with_history.queue()
    app_with_history.launch()