FluxM-Lightning-Upscaler

Running on Zero

LPX commited on Jun 12

Commit

a2ebda3

1 Parent(s): 10d29a5

Add T5EncoderModel and FluxControlNetPipeline initialization in app_v4.py, set max memory usage for ZeroGPU

Files changed (1) hide show

app_v4.py CHANGED Viewed

@@ -6,6 +6,8 @@ import os
 import datetime
 import io
 import moondream as md
 from diffusers.utils import load_image
 from PIL import Image
 from threading import Thread
@@ -19,34 +21,32 @@ from model_loader import safe_model_load
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 MAX_SEED = 1000000
-model_cache = {"models": None}
 md_api_key = os.getenv("MD_KEY")
 model = md.vl(api_key=md_api_key)
-@spaces.GPU()  # This function gets priority for GPU access
-def load_warm_models():
-    """Special function to keep models warm in ZeroGPU"""
-    if model_cache["models"] is None:
-        model_cache["models"] = safe_model_load()
-        print(f"Model cache: {model_cache['models']}")
-    return model_cache["models"]
-# This wrapper keeps the models loaded and accessible
-def get_model():
-    """Get models from cache"""
-    if model_cache["models"] is None:
-        model_cache["models"] = load_warm_models()
-        print(f"Model cache: {model_cache['models']}")
-    return model_cache["models"]
-# import subprocess
-# # subprocess.run("rm -rf /data-nvme/zerogpu-offload/*", env={}, shell=True)
-@spaces.GPU()
-def init_space():
-    global pipe
-    pipe = safe_model_load()
-    return pipe
 @spaces.GPU(duration=12)
 @torch.no_grad()

 import datetime
 import io
 import moondream as md
+from transformers import T5EncoderModel
+from diffusers import FluxControlNetPipeline
 from diffusers.utils import load_image
 from PIL import Image
 from threading import Thread
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 MAX_SEED = 1000000
+huggingface_token = os.getenv("HUGGINFACE_TOKEN")
 md_api_key = os.getenv("MD_KEY")
 model = md.vl(api_key=md_api_key)
+try:
+    # Set max memory usage for ZeroGPU
+    torch.cuda.set_per_process_memory_fraction(1.0)
+    torch.set_float32_matmul_precision("high")
+except Exception as e:
+    print(f"Error setting memory usage: {e}")
+text_encoder_2_unquant = T5EncoderModel.from_pretrained(
+    "LPX55/FLUX.1-merged_uncensored",
+    subfolder="text_encoder_2",
+    torch_dtype=torch.bfloat16,
+    token=huggingface_token
+)
+pipe = FluxControlNetPipeline.from_pretrained(
+    "LPX55/FLUX.1M-8step_upscaler-cnet",
+    torch_dtype=torch.bfloat16,
+    text_encoder_2=text_encoder_2_unquant,
+    token=huggingface_token
+)
+pipe.to("cuda")
 @spaces.GPU(duration=12)
 @torch.no_grad()