wan2-1-fast-security

Running on Zero

App Files Files Community

seawolf2357 commited on 19 days ago

Commit

fde0767

verified ·

1 Parent(s): 05707ed

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -20

app.py CHANGED Viewed

@@ -79,15 +79,25 @@ def measure_time(func):
 # GPU 메모리 정리 함수
 def clear_gpu_memory():
     """강력한 GPU 메모리 정리"""
-    if torch.cuda.is_available():
-        torch.cuda.empty_cache()
-        torch.cuda.ipc_collect()
         gc.collect()
-        # GPU 메모리 상태 로깅
-        allocated = torch.cuda.memory_allocated() / 1024**3
-        reserved = torch.cuda.memory_reserved() / 1024**3
-        logger.info(f"GPU Memory - Allocated: {allocated:.2f}GB, Reserved: {reserved:.2f}GB")
 # 모델 관리자 (싱글톤 패턴)
 class ModelManager:
@@ -125,7 +135,8 @@ class ModelManager:
                 clear_gpu_memory()
                 # 모델 컴포넌트 로드 (메모리 효율적) - autocast 수정
-                if torch.cuda.is_available():
                     with torch.amp.autocast('cuda', enabled=False):  # 수정된 부분
                         image_encoder = CLIPVisionModel.from_pretrained(
                             config.model_id,
@@ -141,18 +152,18 @@ class ModelManager:
                             low_cpu_mem_usage=True
                         )
                 else:
-                    # CPU 환경
                     image_encoder = CLIPVisionModel.from_pretrained(
                         config.model_id,
                         subfolder="image_encoder",
-                        torch_dtype=torch.float32,
                         low_cpu_mem_usage=True
                     )
                     vae = AutoencoderKLWan.from_pretrained(
                         config.model_id,
                         subfolder="vae",
-                        torch_dtype=torch.float32,
                         low_cpu_mem_usage=True
                     )
@@ -160,7 +171,7 @@ class ModelManager:
                     config.model_id,
                     vae=vae,
                     image_encoder=image_encoder,
-                    torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
                     low_cpu_mem_usage=True,
                     use_safetensors=True
                 )
@@ -184,12 +195,16 @@ class ModelManager:
                 # GPU 최적화 설정
                 if hasattr(spaces, 'GPU'):  # Zero GPU 환경
-                    self._pipe.enable_model_cpu_offload()
-                    logger.info("CPU offload enabled for Zero GPU")
                 elif config.enable_model_cpu_offload and torch.cuda.is_available():
                     self._pipe.enable_model_cpu_offload()
                 elif torch.cuda.is_available():
                     self._pipe.to("cuda")
                 if config.enable_vae_slicing:
                     self._pipe.enable_vae_slicing()
@@ -297,8 +312,8 @@ class VideoGenerator:
             if height > 832 or width > 832:  # 한 변의 최대 길이
                 return False, "📐 In Zero GPU environment, maximum dimension is 832 pixels"
-        # GPU 메모리 체크
-        if torch.cuda.is_available():
             try:
                 free_memory = torch.cuda.get_device_properties(0).total_memory - torch.cuda.memory_allocated()
                 required_memory = (height * width * 3 * 8 * duration * self.config.fixed_fps) / (1024**3)
@@ -378,6 +393,10 @@ def generate_video(input_image, prompt, height, width,
         raise gr.Error("⏳ Another video is being generated. Please wait...")
     try:
         progress(0.1, desc="🔍 Validating inputs...")
         # Zero GPU 환경에서 추가 검증
@@ -417,7 +436,9 @@ def generate_video(input_image, prompt, height, width,
         progress(0.4, desc="🎬 Generating video frames...")
         # 메모리 효율적인 생성
-        if torch.cuda.is_available():
             with torch.inference_mode(), torch.amp.autocast('cuda', enabled=True):  # 수정된 부분
                 try:
                     output_frames_list = pipe(
@@ -857,8 +878,11 @@ with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
     )
 if __name__ == "__main__":
-    # 초기 메모리 정리
-    clear_gpu_memory()
     # 앱 실행
     demo.queue(concurrency_count=1)  # 동시 실행 제한

 # GPU 메모리 정리 함수
 def clear_gpu_memory():
     """강력한 GPU 메모리 정리"""
+    # Zero GPU 환경에서는 메인 프로세스에서 CUDA 초기화 방지
+    if hasattr(spaces, 'GPU'):
+        # Zero GPU 환경에서는 @spaces.GPU 내에서만 GPU 작업 수행
         gc.collect()
+        return
+    if torch.cuda.is_available():
+        try:
+            torch.cuda.empty_cache()
+            torch.cuda.ipc_collect()
+            gc.collect()
+            # GPU 메모리 상태 로깅
+            allocated = torch.cuda.memory_allocated() / 1024**3
+            reserved = torch.cuda.memory_reserved() / 1024**3
+            logger.info(f"GPU Memory - Allocated: {allocated:.2f}GB, Reserved: {reserved:.2f}GB")
+        except Exception as e:
+            logger.warning(f"GPU memory clear failed: {e}")
+            gc.collect()
 # 모델 관리자 (싱글톤 패턴)
 class ModelManager:
                 clear_gpu_memory()
                 # 모델 컴포넌트 로드 (메모리 효율적) - autocast 수정
+                if torch.cuda.is_available() and not hasattr(spaces, 'GPU'):
+                    # 일반 GPU 환경
                     with torch.amp.autocast('cuda', enabled=False):  # 수정된 부분
                         image_encoder = CLIPVisionModel.from_pretrained(
                             config.model_id,
                             low_cpu_mem_usage=True
                         )
                 else:
+                    # CPU 환경 또는 Zero GPU 환경
                     image_encoder = CLIPVisionModel.from_pretrained(
                         config.model_id,
                         subfolder="image_encoder",
+                        torch_dtype=torch.float16 if hasattr(spaces, 'GPU') else torch.float32,
                         low_cpu_mem_usage=True
                     )
                     vae = AutoencoderKLWan.from_pretrained(
                         config.model_id,
                         subfolder="vae",
+                        torch_dtype=torch.float16 if hasattr(spaces, 'GPU') else torch.float32,
                         low_cpu_mem_usage=True
                     )
                     config.model_id,
                     vae=vae,
                     image_encoder=image_encoder,
+                    torch_dtype=torch.bfloat16 if (torch.cuda.is_available() or hasattr(spaces, 'GPU')) else torch.float32,
                     low_cpu_mem_usage=True,
                     use_safetensors=True
                 )
                 # GPU 최적화 설정
                 if hasattr(spaces, 'GPU'):  # Zero GPU 환경
+                    # Zero GPU 환경에서는 자동으로 처리됨
+                    logger.info("Model loaded for Zero GPU environment")
                 elif config.enable_model_cpu_offload and torch.cuda.is_available():
                     self._pipe.enable_model_cpu_offload()
+                    logger.info("CPU offload enabled")
                 elif torch.cuda.is_available():
                     self._pipe.to("cuda")
+                    logger.info("Model moved to CUDA")
+                else:
+                    logger.info("Running on CPU")
                 if config.enable_vae_slicing:
                     self._pipe.enable_vae_slicing()
             if height > 832 or width > 832:  # 한 변의 최대 길이
                 return False, "📐 In Zero GPU environment, maximum dimension is 832 pixels"
+        # GPU 메모리 체크 (Zero GPU 환경이 아닐 때만)
+        if torch.cuda.is_available() and not hasattr(spaces, 'GPU'):
             try:
                 free_memory = torch.cuda.get_device_properties(0).total_memory - torch.cuda.memory_allocated()
                 required_memory = (height * width * 3 * 8 * duration * self.config.fixed_fps) / (1024**3)
         raise gr.Error("⏳ Another video is being generated. Please wait...")
     try:
+        # Zero GPU 환경에서는 이제 GPU 사용 가능
+        if hasattr(spaces, 'GPU') and torch.cuda.is_available():
+            logger.info("GPU initialized in Zero GPU environment")
         progress(0.1, desc="🔍 Validating inputs...")
         # Zero GPU 환경에서 추가 검증
         progress(0.4, desc="🎬 Generating video frames...")
         # 메모리 효율적인 생성
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        if device == "cuda":
             with torch.inference_mode(), torch.amp.autocast('cuda', enabled=True):  # 수정된 부분
                 try:
                     output_frames_list = pipe(
     )
 if __name__ == "__main__":
+    # Zero GPU 환경 체크 로깅
+    if hasattr(spaces, 'GPU'):
+        logger.info("Running in Zero GPU environment")
+    else:
+        logger.info("Running in standard environment")
     # 앱 실행
     demo.queue(concurrency_count=1)  # 동시 실행 제한