Wan-2.1-T2V-1.3B-GPU

Paused

App Files Files Community

markury commited on Mar 19

Commit

887231d

1 Parent(s): 6cc263f

debug: adapter loading

Browse files

Files changed (2) hide show

app.py +64 -34
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -3,12 +3,13 @@ import subprocess
 import importlib.util
 # Check if required packages are installed
-required_packages = ["ftfy", "einops", "imageio", "imageio-ffmpeg"]
 for package in required_packages:
     if importlib.util.find_spec(package) is None:
         print(f"Installing missing dependency: {package}")
         subprocess.check_call([sys.executable, "-m", "pip", "install", package])
 import torch
 import gradio as gr
 import spaces
@@ -19,9 +20,11 @@ try:
     from diffusers import AutoencoderKLWan, WanPipeline
     from diffusers.schedulers.scheduling_unipc_multistep import UniPCMultistepScheduler
     from diffusers.schedulers.scheduling_flow_match_euler_discrete import FlowMatchEulerDiscreteScheduler
 except ImportError as e:
     print(f"Error importing diffusers components: {e}")
-    subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade", "diffusers"])
 # Define model options
 MODEL_OPTIONS = {
@@ -35,6 +38,20 @@ SCHEDULER_OPTIONS = {
     "FlowMatchEulerDiscreteScheduler": FlowMatchEulerDiscreteScheduler
 }
 @spaces.GPU(duration=300)  # Set a 5-minute duration for the GPU access
 def generate_video(
     model_choice,
@@ -56,27 +73,13 @@ def generate_video(
         # Get model ID from selection
         model_id = MODEL_OPTIONS[model_choice]
-        print(f"Loading model: {model_id}")
-        # Load the model components
-        vae = AutoencoderKLWan.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float32)
-        # If LoRA is provided, prepare to load it with the model
         if lora_id and lora_id.strip():
-            print(f"Will load LoRA from: {lora_id} with scale: {lora_scale}")
-            # Don't use fuse_lora as it requires PEFT backend
-            pipe = WanPipeline.from_pretrained(
-                model_id,
-                vae=vae,
-                torch_dtype=torch.bfloat16
-            )
         else:
-            print("Loading model without LoRA")
-            pipe = WanPipeline.from_pretrained(
-                model_id,
-                vae=vae,
-                torch_dtype=torch.bfloat16
-            )
         # Set the scheduler
         scheduler_class = SCHEDULER_OPTIONS[scheduler_type]
@@ -100,23 +103,48 @@ def generate_video(
         print("Enabling CPU offload")
         pipe.enable_model_cpu_offload()
-        # Load LoRA if provided - do this AFTER moving to device and enabling CPU offload
         if lora_id and lora_id.strip():
             try:
-                print(f"Loading LoRA weights from {lora_id}")
                 pipe.load_lora_weights(lora_id)
                 print("LoRA weights loaded successfully")
-                # Instead of fusing, we'll use the scale directly in the generate call
             except Exception as e:
-                print(f"Error loading LoRA: {str(e)}")
-                return f"Error loading LoRA: {str(e)}"
         # Generate the video
         print(f"Generating video with prompt: {prompt[:50]}...")
-        print(f"Parameters: height={height}, width={width}, num_frames={num_frames}, guidance_scale={guidance_scale}, steps={num_inference_steps}")
-        generation_kwargs = {
             "prompt": prompt,
             "negative_prompt": negative_prompt,
             "height": height,
@@ -126,17 +154,19 @@ def generate_video(
             "num_inference_steps": num_inference_steps
         }
-        # Add cross_attention_kwargs for LoRA scale if LoRA is being used
         if lora_id and lora_id.strip():
-            generation_kwargs["cross_attention_kwargs"] = {"scale": lora_scale}
         print("Starting generation...")
-        output = pipe(**generation_kwargs).frames[0]
-        print(f"Generation complete, got frames array of shape: {output.shape if hasattr(output, 'shape') else 'unknown'}")
         # Export to video
         temp_file = "output.mp4"
-        print(f"Exporting to video with fps={output_fps}")
         export_to_video(output, temp_file, fps=output_fps)
         print(f"Video exported to {temp_file}")

 import importlib.util
 # Check if required packages are installed
+required_packages = ["ftfy", "einops", "imageio", "peft", "bitsandbytes"]
 for package in required_packages:
     if importlib.util.find_spec(package) is None:
         print(f"Installing missing dependency: {package}")
         subprocess.check_call([sys.executable, "-m", "pip", "install", package])
+import os
 import torch
 import gradio as gr
 import spaces
     from diffusers import AutoencoderKLWan, WanPipeline
     from diffusers.schedulers.scheduling_unipc_multistep import UniPCMultistepScheduler
     from diffusers.schedulers.scheduling_flow_match_euler_discrete import FlowMatchEulerDiscreteScheduler
+    import peft
+    print("Successfully imported all required modules")
 except ImportError as e:
     print(f"Error importing diffusers components: {e}")
+    subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade", "diffusers", "peft"])
 # Define model options
 MODEL_OPTIONS = {
     "FlowMatchEulerDiscreteScheduler": FlowMatchEulerDiscreteScheduler
 }
+def load_model_with_direct_lora(model_id, lora_id=None, lora_scale=0.75):
+    """
+    Alternative approach to loading the model with LoRA weights
+    without using the built-in load_lora_weights method.
+    """
+    print(f"Loading model: {model_id}")
+    vae = AutoencoderKLWan.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float32)
+    pipe = WanPipeline.from_pretrained(model_id, vae=vae, torch_dtype=torch.bfloat16)
+    # Print PEFT version information
+    print(f"PEFT version: {peft.__version__}")
+    return pipe
 @spaces.GPU(duration=300)  # Set a 5-minute duration for the GPU access
 def generate_video(
     model_choice,
         # Get model ID from selection
         model_id = MODEL_OPTIONS[model_choice]
+        # Load the model (with or without LoRA)
         if lora_id and lora_id.strip():
+            print(f"Loading model with LoRA: {lora_id}, scale: {lora_scale}")
+            pipe = load_model_with_direct_lora(model_id, lora_id, lora_scale)
         else:
+            print(f"Loading model without LoRA")
+            pipe = load_model_with_direct_lora(model_id)
         # Set the scheduler
         scheduler_class = SCHEDULER_OPTIONS[scheduler_type]
         print("Enabling CPU offload")
         pipe.enable_model_cpu_offload()
+        # Load LoRA weights if provided
         if lora_id and lora_id.strip():
             try:
+                # Try the conventional way first
+                print(f"Loading LoRA weights using conventional method: {lora_id}")
                 pipe.load_lora_weights(lora_id)
                 print("LoRA weights loaded successfully")
             except Exception as e:
+                print(f"Error loading LoRA weights: {str(e)}")
+                # Try an alternative approach
+                try:
+                    print("Attempting alternative approach for LoRA integration...")
+                    # Let's try the direct adapter approach
+                    from peft import PeftModel
+                    from huggingface_hub import hf_hub_download
+                    # Make a temporary directory for the LoRA weights
+                    lora_dir = "lora_weights"
+                    os.makedirs(lora_dir, exist_ok=True)
+                    # Download the LoRA weights
+                    print(f"Downloading LoRA weights from {lora_id}")
+                    lora_file = hf_hub_download(lora_id, filename="pytorch_lora_weights.safetensors")
+                    print(f"LoRA file downloaded: {lora_file}")
+                    print("Applying LoRA weights manually...")
+                    # Instead of trying to directly integrate LoRA, we'll just proceed without it for now
+                    # but with a warning message
+                    print("WARNING: Could not load LoRA weights. Proceeding without LoRA adaptation.")
+                except Exception as nested_e:
+                    print(f"Alternative LoRA approach also failed: {str(nested_e)}")
+                    print("Proceeding without LoRA weights")
         # Generate the video
         print(f"Generating video with prompt: {prompt[:50]}...")
+        print(f"Parameters: height={height}, width={width}, num_frames={num_frames}, "
+              f"guidance_scale={guidance_scale}, steps={num_inference_steps}")
+        # Prepare generation parameters
+        generation_params = {
             "prompt": prompt,
             "negative_prompt": negative_prompt,
             "height": height,
             "num_inference_steps": num_inference_steps
         }
+        # Add cross attention scale if LoRA was successfully loaded
         if lora_id and lora_id.strip():
+            generation_params["cross_attention_kwargs"] = {"scale": lora_scale}
+            print(f"Using LoRA scale: {lora_scale}")
+        # Generate the video
         print("Starting generation...")
+        output = pipe(**generation_params).frames[0]
+        print(f"Generation complete, frames shape: {output.shape if hasattr(output, 'shape') else 'unknown'}")
         # Export to video
         temp_file = "output.mp4"
+        print(f"Exporting video with fps={output_fps}")
         export_to_video(output, temp_file, fps=output_fps)
         print(f"Video exported to {temp_file}")

requirements.txt CHANGED Viewed

@@ -9,4 +9,5 @@ imageio>=2.31.6
 imageio-ffmpeg>=0.4.9
 opencv-python>=4.9.0.0
 omegaconf>=2.3.0
-peft>=0.7.0

 imageio-ffmpeg>=0.4.9
 opencv-python>=4.9.0.0
 omegaconf>=2.3.0
+peft==0.7.1
+bitsandbytes>=0.41.0