Spaces:
Sleeping
Sleeping
app.py
CHANGED
@@ -13,25 +13,23 @@ import tempfile
|
|
13 |
# Set environment variable to reduce memory fragmentation
|
14 |
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
|
15 |
|
16 |
-
#
|
17 |
-
|
18 |
-
torch_dtype = torch.float16 if device == "cuda" else torch.float32
|
19 |
|
20 |
-
|
21 |
-
|
22 |
-
pipe
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
# Use CPU offloading to reduce VRAM usage on GPU
|
35 |
pipe.enable_model_cpu_offload()
|
36 |
|
37 |
# Try to enable memory efficient attention
|
@@ -40,13 +38,9 @@ try:
|
|
40 |
except (ModuleNotFoundError, ImportError):
|
41 |
print("xformers not available, using attention slicing")
|
42 |
pipe.enable_attention_slicing()
|
43 |
-
else:
|
44 |
-
# For CPU inference, enable attention slicing
|
45 |
-
pipe.enable_attention_slicing()
|
46 |
|
47 |
-
|
48 |
-
|
49 |
-
pipe = None
|
50 |
|
51 |
|
52 |
@spaces.GPU
|
@@ -60,8 +54,11 @@ def img2img(
|
|
60 |
num_inference_steps: int = 50,
|
61 |
seed: int = -1,
|
62 |
):
|
63 |
-
|
64 |
-
|
|
|
|
|
|
|
65 |
|
66 |
try:
|
67 |
# Choose image source
|
@@ -86,9 +83,9 @@ def img2img(
|
|
86 |
|
87 |
# Set seed and generator
|
88 |
if seed == -1:
|
89 |
-
generator = torch.Generator(device=
|
90 |
else:
|
91 |
-
generator = torch.Generator(device=
|
92 |
|
93 |
# Validate inputs
|
94 |
if not prompt.strip():
|
@@ -120,7 +117,7 @@ def img2img(
|
|
120 |
"steps": num_inference_steps,
|
121 |
"width": result.width,
|
122 |
"height": result.height,
|
123 |
-
"device":
|
124 |
}
|
125 |
|
126 |
# Save metadata into PNG
|
|
|
13 |
# Set environment variable to reduce memory fragmentation
|
14 |
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
|
15 |
|
16 |
+
# Initialize pipeline as None - will be loaded in GPU function
|
17 |
+
pipe = None
|
|
|
18 |
|
19 |
+
def load_pipeline():
|
20 |
+
"""Load the pipeline on GPU when needed"""
|
21 |
+
global pipe
|
22 |
+
if pipe is None:
|
23 |
+
print("Loading pipeline...")
|
24 |
+
pipe = StableDiffusionXLImg2ImgPipeline.from_pretrained(
|
25 |
+
"stabilityai/stable-diffusion-xl-refiner-1.0",
|
26 |
+
torch_dtype=torch.float16,
|
27 |
+
variant="fp16",
|
28 |
+
use_safetensors=True,
|
29 |
+
device_map="auto"
|
30 |
+
)
|
31 |
+
|
32 |
+
# Enable memory optimizations
|
|
|
33 |
pipe.enable_model_cpu_offload()
|
34 |
|
35 |
# Try to enable memory efficient attention
|
|
|
38 |
except (ModuleNotFoundError, ImportError):
|
39 |
print("xformers not available, using attention slicing")
|
40 |
pipe.enable_attention_slicing()
|
|
|
|
|
|
|
41 |
|
42 |
+
print("Pipeline loaded successfully!")
|
43 |
+
return pipe
|
|
|
44 |
|
45 |
|
46 |
@spaces.GPU
|
|
|
54 |
num_inference_steps: int = 50,
|
55 |
seed: int = -1,
|
56 |
):
|
57 |
+
# Load pipeline inside GPU context
|
58 |
+
try:
|
59 |
+
pipe = load_pipeline()
|
60 |
+
except Exception as e:
|
61 |
+
return None, f"❌ Failed to load model: {str(e)}", None
|
62 |
|
63 |
try:
|
64 |
# Choose image source
|
|
|
83 |
|
84 |
# Set seed and generator
|
85 |
if seed == -1:
|
86 |
+
generator = torch.Generator(device="cuda")
|
87 |
else:
|
88 |
+
generator = torch.Generator(device="cuda").manual_seed(seed)
|
89 |
|
90 |
# Validate inputs
|
91 |
if not prompt.strip():
|
|
|
117 |
"steps": num_inference_steps,
|
118 |
"width": result.width,
|
119 |
"height": result.height,
|
120 |
+
"device": "cuda"
|
121 |
}
|
122 |
|
123 |
# Save metadata into PNG
|