Spaces:

LPX55
/

Lightning-Painter-Multitool

Running on Zero

App Files Files Community

LPX55 commited on Mar 29

Commit

8fa6d2e

verified ·

1 Parent(s): 76f2c71

Update sam2_mask.py

Browse files

Files changed (1) hide show

sam2_mask.py +51 -20

sam2_mask.py CHANGED Viewed

@@ -39,7 +39,15 @@ def get_point(point_type, tracking_points, trackings_input_label, first_frame_pa
     transparent_layer = Image.fromarray(transparent_layer, 'RGBA')
     selected_point_map = Image.alpha_composite(transparent_background, transparent_layer)
     return tracking_points, trackings_input_label, selected_point_map
 def show_mask(mask, ax, random_color=False, borders=True):
     if random_color:
         color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
@@ -102,8 +110,12 @@ def show_masks(image, masks, scores, point_coords=None, box_coords=None, input_l
 @spaces.GPU()
 def sam_process(original_image, points, labels):
     print(f"Points: {points}")
     print(f"Labels: {labels}")
     if not points or not labels:
         print("No points or labels provided, returning None")
         return None
@@ -111,15 +123,26 @@ def sam_process(original_image, points, labels):
     image = np.array(original_image)
     predictor = SAM2ImagePredictor.from_pretrained("facebook/sam2.1-hiera-large")
     predictor.set_image(image)
-    input_point = np.array(points)
-    input_label = np.array(labels)
-    masks, scores, _ = predictor.predict(input_point, input_label, multimask_output=False)
     sorted_indices = np.argsort(scores)[::-1]
     masks = masks[sorted_indices]
-    # Generate mask image
-    mask = masks[0] * 255
-    mask_image = Image.fromarray(mask.astype(np.uint8))
-    return mask_image
 def create_sam2_tab():
     first_frame = gr.State()  # Tracks original image
@@ -127,24 +150,32 @@ def create_sam2_tab():
     trackings_input_label = gr.State([])
     with gr.Column():
-        gr.Markdown("# SAM2 Image Predictor")
-        gr.Markdown("1. Upload your image\n2. Click points to mask\n3. Submit")
-        points_map = gr.Image(label="Points Map", type="pil", interactive=True)
-        input_image = gr.Image(type="pil", visible=False)  # Original image
         with gr.Row():
-            point_type = gr.Radio(["include", "exclude"], value="include", label="Point Type")
-            clear_button = gr.Button("Clear Points")
-        submit_button = gr.Button("Submit")
-        output_image = gr.Image("Segmented Output")
         # Event handlers
         points_map.upload(
             lambda img: (img, img, [], []),
             inputs=points_map,
-            outputs=[input_image, first_frame, tracking_points, trackings_input_label]
         )
         clear_button.click(
@@ -161,8 +192,8 @@ def create_sam2_tab():
         submit_button.click(
             sam_process,
-            inputs=[input_image, tracking_points, trackings_input_label],
             outputs=output_image
         )
-    return input_image, points_map, output_image

     transparent_layer = Image.fromarray(transparent_layer, 'RGBA')
     selected_point_map = Image.alpha_composite(transparent_background, transparent_layer)
     return tracking_points, trackings_input_label, selected_point_map
+# use bfloat16 for the entire notebook
+torch.autocast(device_type="cuda", dtype=torch.bfloat16).__enter__()
+if torch.cuda.get_device_properties(0).major >= 8:
+    # turn on tfloat32 for Ampere GPUs (https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices)
+    torch.backends.cuda.matmul.allow_tf32 = True
+    torch.backends.cudnn.allow_tf32 = True
 def show_mask(mask, ax, random_color=False, borders=True):
     if random_color:
         color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
 @spaces.GPU()
 def sam_process(original_image, points, labels):
     print(f"Points: {points}")
     print(f"Labels: {labels}")
+    image = Image.open(original_image)
+    image = np.array(image.convert("RGB"))
     if not points or not labels:
         print("No points or labels provided, returning None")
         return None
     image = np.array(original_image)
     predictor = SAM2ImagePredictor.from_pretrained("facebook/sam2.1-hiera-large")
     predictor.set_image(image)
+    input_point = np.array(points.value)
+    input_label = np.array(labels.value)
+    print(predictor._features["image_embed"].shape, predictor._features["image_embed"][-1].shape)
+    masks, scores, logits = predictor.predict(
+        point_coords=input_point,
+        point_labels=input_label,
+        multimask_output=False,
+    )
     sorted_indices = np.argsort(scores)[::-1]
     masks = masks[sorted_indices]
+    scores = scores[sorted_indices]
+    logits = logits[sorted_indices]
+    print(masks.shape)
+    results, mask_results = show_masks(image, masks, scores, point_coords=input_point, input_labels=input_label, borders=True)
+    print(results)
+    return results[0], mask_results[0]
 def create_sam2_tab():
     first_frame = gr.State()  # Tracks original image
     trackings_input_label = gr.State([])
     with gr.Column():
         with gr.Row():
+            with gr.Column():
+                sam_input_image = gr.Image(label="input image", interactive=False, type="filepath", visible=False)
+                points_map = gr.Image(
+                    label="points map",
+                    type="filepath",
+                    interactive=True
+                )
+                with gr.Row():
+                    point_type = gr.Radio(["include", "exclude"], value="include", label="Point Type")
+                    clear_button = gr.Button("Clear Points")
+                submit_button = gr.Button("Submit")
+            with gr.Column():
+                output_image = gr.Image("Segmented Output")
+                output_result_mask = gr.Image()
         # Event handlers
         points_map.upload(
             lambda img: (img, img, [], []),
             inputs=points_map,
+            outputs=[sam_input_image, first_frame, tracking_points, trackings_input_label]
         )
         clear_button.click(
         submit_button.click(
             sam_process,
+            inputs=[sam_input_image, tracking_points, trackings_input_label],
             outputs=output_image
         )
+    return sam_input_image, points_map, output_image