LPX commited on
Commit
a2ebda3
·
1 Parent(s): 10d29a5

Add T5EncoderModel and FluxControlNetPipeline initialization in app_v4.py, set max memory usage for ZeroGPU

Browse files
Files changed (1) hide show
  1. app_v4.py +22 -22
app_v4.py CHANGED
@@ -6,6 +6,8 @@ import os
6
  import datetime
7
  import io
8
  import moondream as md
 
 
9
  from diffusers.utils import load_image
10
  from PIL import Image
11
  from threading import Thread
@@ -19,34 +21,32 @@ from model_loader import safe_model_load
19
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
20
  MAX_SEED = 1000000
21
 
22
- model_cache = {"models": None}
23
  md_api_key = os.getenv("MD_KEY")
24
  model = md.vl(api_key=md_api_key)
25
 
26
- @spaces.GPU() # This function gets priority for GPU access
27
- def load_warm_models():
28
- """Special function to keep models warm in ZeroGPU"""
29
- if model_cache["models"] is None:
30
- model_cache["models"] = safe_model_load()
31
- print(f"Model cache: {model_cache['models']}")
32
- return model_cache["models"]
33
 
34
- # This wrapper keeps the models loaded and accessible
35
- def get_model():
36
- """Get models from cache"""
37
- if model_cache["models"] is None:
38
- model_cache["models"] = load_warm_models()
39
- print(f"Model cache: {model_cache['models']}")
40
 
41
- return model_cache["models"]
 
 
 
 
 
 
42
 
43
- # import subprocess
44
- # # subprocess.run("rm -rf /data-nvme/zerogpu-offload/*", env={}, shell=True)
45
- @spaces.GPU()
46
- def init_space():
47
- global pipe
48
- pipe = safe_model_load()
49
- return pipe
50
 
51
  @spaces.GPU(duration=12)
52
  @torch.no_grad()
 
6
  import datetime
7
  import io
8
  import moondream as md
9
+ from transformers import T5EncoderModel
10
+ from diffusers import FluxControlNetPipeline
11
  from diffusers.utils import load_image
12
  from PIL import Image
13
  from threading import Thread
 
21
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
22
  MAX_SEED = 1000000
23
 
24
+ huggingface_token = os.getenv("HUGGINFACE_TOKEN")
25
  md_api_key = os.getenv("MD_KEY")
26
  model = md.vl(api_key=md_api_key)
27
 
28
+ try:
29
+ # Set max memory usage for ZeroGPU
30
+ torch.cuda.set_per_process_memory_fraction(1.0)
31
+ torch.set_float32_matmul_precision("high")
32
+ except Exception as e:
33
+ print(f"Error setting memory usage: {e}")
 
34
 
35
+ text_encoder_2_unquant = T5EncoderModel.from_pretrained(
36
+ "LPX55/FLUX.1-merged_uncensored",
37
+ subfolder="text_encoder_2",
38
+ torch_dtype=torch.bfloat16,
39
+ token=huggingface_token
40
+ )
41
 
42
+ pipe = FluxControlNetPipeline.from_pretrained(
43
+ "LPX55/FLUX.1M-8step_upscaler-cnet",
44
+ torch_dtype=torch.bfloat16,
45
+ text_encoder_2=text_encoder_2_unquant,
46
+ token=huggingface_token
47
+ )
48
+ pipe.to("cuda")
49
 
 
 
 
 
 
 
 
50
 
51
  @spaces.GPU(duration=12)
52
  @torch.no_grad()