geowizard-e2e-ft

Running

App Files Files Community

x10z commited on May 3

Commit

0b350a4

verified ·

1 Parent(s): 378bb55

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -5

app.py CHANGED Viewed

@@ -35,12 +35,15 @@ pipe.unet.eval()
 # UI texts
 title = "# End-to-End Fine-Tuned GeoWizard Video"
-description = """ Please refer to our [paper](https://arxiv.org/abs/2409.11355) and [GitHub](https://vision.rwth-aachen.de/diffusion-e2e-ft) for more details."""
 @spaces.GPU
 def predict(image: Image.Image, processing_res_choice: int):
     """
     Single-frame prediction wrapped for GPU execution.
     """
     with torch.no_grad():
         return pipe(
@@ -67,7 +70,7 @@ def on_submit_video(video_path: str, processing_res_choice: int):
     height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
     frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-    # Create temporary output video files
     tmp_depth = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False)
     tmp_normal = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False)
     fourcc = cv2.VideoWriter_fourcc(*'mp4v')
@@ -80,13 +83,14 @@ def on_submit_video(video_path: str, processing_res_choice: int):
         if not ret:
             break
-        # Convert BGR to RGB PIL image
         rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
         pil_image = Image.fromarray(rgb)
         # Run prediction
-time_error
-        depth_np, depth_colored, normal_np, normal_colored = predict(pil_image, processing_res_choice)
         # Write depth frame
         depth_frame = np.array(depth_colored)
@@ -103,6 +107,7 @@ time_error
     out_depth.release()
     out_normal.release()
     return tmp_depth.name, tmp_normal.name
 # Build Gradio interface

 # UI texts
 title = "# End-to-End Fine-Tuned GeoWizard Video"
+description = """
+Please refer to our [paper](https://arxiv.org/abs/2409.11355) and [GitHub](https://vision.rwth-aachen.de/diffusion-e2e-ft) for more details.
+"""
 @spaces.GPU
 def predict(image: Image.Image, processing_res_choice: int):
     """
     Single-frame prediction wrapped for GPU execution.
+    Returns a DepthNormalPipelineOutput with attributes depth_colored and normal_colored.
     """
     with torch.no_grad():
         return pipe(
     height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
     frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    # Temporary output files
     tmp_depth = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False)
     tmp_normal = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False)
     fourcc = cv2.VideoWriter_fourcc(*'mp4v')
         if not ret:
             break
+        # Convert BGR to RGB and to PIL
         rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
         pil_image = Image.fromarray(rgb)
         # Run prediction
+        result = predict(pil_image, processing_res_choice)
+        depth_colored = result.depth_colored
+        normal_colored = result.normal_colored
         # Write depth frame
         depth_frame = np.array(depth_colored)
     out_depth.release()
     out_normal.release()
+    # Return paths for download
     return tmp_depth.name, tmp_normal.name
 # Build Gradio interface