Spaces:

facebook
/

EdgeTAM

Runtime error

App Files Files Community

chongzhou commited on May 19

Commit

db1d5c6

1 Parent(s): 3ccde9c

autocast

Browse files

Files changed (1) hide show

app.py +41 -44

app.py CHANGED Viewed

@@ -337,57 +337,54 @@ def propagate_to_all(
     input_points,
     inference_state,
 ):
     predictor = build_sam2_video_predictor(model_cfg, sam2_checkpoint, device="cuda")
     if torch.cuda.get_device_properties(0).major >= 8:
         torch.backends.cuda.matmul.allow_tf32 = True
         torch.backends.cudnn.allow_tf32 = True
-    with torch.autocast(device_type="cuda", dtype=torch.bfloat16):
-        if len(input_points) == 0 or video_in is None or inference_state is None:
-            return None
-        # run propagation throughout the video and collect the results in a dict
-        video_segments = (
-            {}
-        )  # video_segments contains the per-frame segmentation results
-        print("starting propagate_in_video")
-        for out_frame_idx, out_obj_ids, out_mask_logits in predictor.propagate_in_video(
-            inference_state
-        ):
-            video_segments[out_frame_idx] = {
-                out_obj_id: (out_mask_logits[i] > 0.0).cpu().numpy()
-                for i, out_obj_id in enumerate(out_obj_ids)
-            }
-        # obtain the segmentation results every few frames
-        vis_frame_stride = 1
-        output_frames = []
-        for out_frame_idx in range(0, len(video_segments), vis_frame_stride):
-            transparent_background = Image.fromarray(all_frames[out_frame_idx]).convert(
-                "RGBA"
-            )
-            out_mask = video_segments[out_frame_idx][OBJ_ID]
-            mask_image = show_mask(out_mask)
-            output_frame = Image.alpha_composite(transparent_background, mask_image)
-            output_frame = np.array(output_frame)
-            output_frames.append(output_frame)
-        torch.cuda.empty_cache()
-        # Create a video clip from the image sequence
-        original_fps = get_video_fps(video_in)
-        fps = original_fps  # Frames per second
-        clip = ImageSequenceClip(output_frames, fps=fps)
-        # Write the result to a file
-        unique_id = datetime.now().strftime("%Y%m%d%H%M%S")
-        final_vid_output_path = f"output_video_{unique_id}.mp4"
-        final_vid_output_path = os.path.join(
-            tempfile.gettempdir(), final_vid_output_path
         )
-        # Write the result to a file
-        clip.write_videofile(final_vid_output_path, codec="libx264")
-        return gr.update(value=final_vid_output_path)
 def update_ui():

     input_points,
     inference_state,
 ):
+    torch.autocast(device_type="cuda", dtype=torch.bfloat16).__enter__()
     predictor = build_sam2_video_predictor(model_cfg, sam2_checkpoint, device="cuda")
     if torch.cuda.get_device_properties(0).major >= 8:
         torch.backends.cuda.matmul.allow_tf32 = True
         torch.backends.cudnn.allow_tf32 = True
+    if len(input_points) == 0 or video_in is None or inference_state is None:
+        return None
+    # run propagation throughout the video and collect the results in a dict
+    video_segments = {}  # video_segments contains the per-frame segmentation results
+    print("starting propagate_in_video")
+    for out_frame_idx, out_obj_ids, out_mask_logits in predictor.propagate_in_video(
+        inference_state
+    ):
+        video_segments[out_frame_idx] = {
+            out_obj_id: (out_mask_logits[i] > 0.0).cpu().numpy()
+            for i, out_obj_id in enumerate(out_obj_ids)
+        }
+    # obtain the segmentation results every few frames
+    vis_frame_stride = 1
+    output_frames = []
+    for out_frame_idx in range(0, len(video_segments), vis_frame_stride):
+        transparent_background = Image.fromarray(all_frames[out_frame_idx]).convert(
+            "RGBA"
         )
+        out_mask = video_segments[out_frame_idx][OBJ_ID]
+        mask_image = show_mask(out_mask)
+        output_frame = Image.alpha_composite(transparent_background, mask_image)
+        output_frame = np.array(output_frame)
+        output_frames.append(output_frame)
+    torch.cuda.empty_cache()
+    # Create a video clip from the image sequence
+    original_fps = get_video_fps(video_in)
+    fps = original_fps  # Frames per second
+    clip = ImageSequenceClip(output_frames, fps=fps)
+    # Write the result to a file
+    unique_id = datetime.now().strftime("%Y%m%d%H%M%S")
+    final_vid_output_path = f"output_video_{unique_id}.mp4"
+    final_vid_output_path = os.path.join(tempfile.gettempdir(), final_vid_output_path)
+    # Write the result to a file
+    clip.write_videofile(final_vid_output_path, codec="libx264")
+    return gr.update(value=final_vid_output_path)
 def update_ui():