Video-Matting-Anything

Paused

App Files Files Community

fffiloni commited on Jun 11, 2023

Commit

3d8a88c

1 Parent(s): 0243396

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -26

app.py CHANGED Viewed

@@ -114,8 +114,14 @@ def create_video(frames, fps):
     return 'movie.mp4'
-def run_grounded_sam(input_image, text_prompt, task_type, background_prompt, background_type, box_threshold, text_threshold, iou_threshold, scribble_mode, guidance_mode):
     #global groundingdino_model, sam_predictor, generator
     # make dir
@@ -276,9 +282,9 @@ def run_grounded_sam(input_image, text_prompt, task_type, background_prompt, bac
     green_img = alpha_pred[..., None] * image_ori + (1 - alpha_pred[..., None]) * np.array([PALETTE_back], dtype='uint8')
     green_img = np.uint8(green_img)
     #return [(com_img, 'composite with background'), (green_img, 'green screen'), (alpha_rgb, 'alpha matte')]
-    return alpha_rgb
-def infer(video_in, trim_value, prompt, background_prompt, background_type, box_threshold, text_threshold, iou_threshold, scribble_mode, guidance_mode):
     print(prompt)
     break_vid = get_frames(video_in)
@@ -299,11 +305,10 @@ def infer(video_in, trim_value, prompt, background_prompt, background_type, box_
         # Convert the image to a NumPy array
         image_array = np.array(to_numpy_i)
         matte_img = run_grounded_sam(image_array, prompt, "text", background_prompt, background_type, box_threshold, text_threshold, iou_threshold, scribble_mode, guidance_mode)
-        #print(pix2pix_img)u
-        matte_img = Image.fromarray(matte_img)
-        #rgb_im = image.convertt("RGB")
         # exporting the image
         matte_img.save(f"result_img-{i}.jpg")
@@ -360,26 +365,26 @@ if __name__ == "__main__":
                 #task_type = gr.Dropdown(["scribble_point", "scribble_box", "text"], value="text", label="Prompt type")
                 #task_type = "text"
                 text_prompt = gr.Textbox(label="Text prompt", placeholder="the girl in the middle")
-                background_type = gr.Dropdown(["generated_by_text", "real_world_sample"], value="generated_by_text", label="Background type")
                 background_prompt = gr.Textbox(label="Background prompt", placeholder="downtown area in New York")
                 run_button = gr.Button(label="Run")
-                with gr.Accordion("Advanced options", open=False):
-                    box_threshold = gr.Slider(
-                        label="Box Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.05
-                    )
-                    text_threshold = gr.Slider(
-                        label="Text Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.05
-                    )
-                    iou_threshold = gr.Slider(
-                        label="IOU Threshold", minimum=0.0, maximum=1.0, value=0.5, step=0.05
-                    )
-                    scribble_mode = gr.Dropdown(
-                        ["merge", "split"], value="split", label="scribble_mode"
-                    )
-                    guidance_mode = gr.Dropdown(
-                        ["mask", "alpha"], value="alpha", label="guidance_mode", info="mask guidance is for complex scenes with multiple instances, alpha guidance is for simple scene with single instance"
-                    )
             with gr.Column():
                 #gallery = gr.Gallery(
@@ -388,7 +393,7 @@ if __name__ == "__main__":
                 video_out = gr.Video()
         run_button.click(fn=infer, inputs=[
-                        video_in, trim_in, text_prompt, background_prompt, background_type, box_threshold, text_threshold, iou_threshold, scribble_mode, guidance_mode], outputs=video_out)
     block.launch(debug=args.debug, share=args.share, show_error=True)
     #block.queue(concurrency_count=100)

     return 'movie.mp4'
+def run_grounded_sam(input_image, text_prompt, task_type, background_prompt):
+    background_type = "generated_by_text"
+    box_threshold = 0.25
+    text_threshold = 0.25
+    iou_threshold = 0.5
+    scribble_mode = "split"
+    guidance_mode = "alpha"
     #global groundingdino_model, sam_predictor, generator
     # make dir
     green_img = alpha_pred[..., None] * image_ori + (1 - alpha_pred[..., None]) * np.array([PALETTE_back], dtype='uint8')
     green_img = np.uint8(green_img)
     #return [(com_img, 'composite with background'), (green_img, 'green screen'), (alpha_rgb, 'alpha matte')]
+    return com_img, alpha_rgb
+def infer(video_in, trim_value, prompt, background_prompt):
     print(prompt)
     break_vid = get_frames(video_in)
         # Convert the image to a NumPy array
         image_array = np.array(to_numpy_i)
         matte_img = run_grounded_sam(image_array, prompt, "text", background_prompt, background_type, box_threshold, text_threshold, iou_threshold, scribble_mode, guidance_mode)
+        matte_img= Image.fromarray(matte_img[1])
         # exporting the image
         matte_img.save(f"result_img-{i}.jpg")
                 #task_type = gr.Dropdown(["scribble_point", "scribble_box", "text"], value="text", label="Prompt type")
                 #task_type = "text"
                 text_prompt = gr.Textbox(label="Text prompt", placeholder="the girl in the middle")
+                #background_type = gr.Dropdown(["generated_by_text", "real_world_sample"], value="generated_by_text", label="Background type")
                 background_prompt = gr.Textbox(label="Background prompt", placeholder="downtown area in New York")
                 run_button = gr.Button(label="Run")
+                #with gr.Accordion("Advanced options", open=False):
+                #   box_threshold = gr.Slider(
+                #       label="Box Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.05
+                #   )
+                #   text_threshold = gr.Slider(
+                #       label="Text Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.05
+                #   )
+                #   iou_threshold = gr.Slider(
+                #       label="IOU Threshold", minimum=0.0, maximum=1.0, value=0.5, step=0.05
+                #   )
+                #   scribble_mode = gr.Dropdown(
+                #       ["merge", "split"], value="split", label="scribble_mode"
+                #   )
+                #   guidance_mode = gr.Dropdown(
+                #       ["mask", "alpha"], value="alpha", label="guidance_mode", info="mask guidance is for complex scenes with multiple instances, alpha guidance is for simple scene with single instance"
+                #   )
             with gr.Column():
                 #gallery = gr.Gallery(
                 video_out = gr.Video()
         run_button.click(fn=infer, inputs=[
+                        video_in, trim_in, text_prompt, background_prompt], outputs=video_out)
     block.launch(debug=args.debug, share=args.share, show_error=True)
     #block.queue(concurrency_count=100)