Update app.py
Browse files
app.py
CHANGED
@@ -114,8 +114,14 @@ def create_video(frames, fps):
|
|
114 |
return 'movie.mp4'
|
115 |
|
116 |
|
117 |
-
def run_grounded_sam(input_image, text_prompt, task_type, background_prompt
|
118 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
119 |
#global groundingdino_model, sam_predictor, generator
|
120 |
|
121 |
# make dir
|
@@ -276,9 +282,9 @@ def run_grounded_sam(input_image, text_prompt, task_type, background_prompt, bac
|
|
276 |
green_img = alpha_pred[..., None] * image_ori + (1 - alpha_pred[..., None]) * np.array([PALETTE_back], dtype='uint8')
|
277 |
green_img = np.uint8(green_img)
|
278 |
#return [(com_img, 'composite with background'), (green_img, 'green screen'), (alpha_rgb, 'alpha matte')]
|
279 |
-
return alpha_rgb
|
280 |
|
281 |
-
def infer(video_in, trim_value, prompt, background_prompt
|
282 |
print(prompt)
|
283 |
break_vid = get_frames(video_in)
|
284 |
|
@@ -299,11 +305,10 @@ def infer(video_in, trim_value, prompt, background_prompt, background_type, box_
|
|
299 |
# Convert the image to a NumPy array
|
300 |
image_array = np.array(to_numpy_i)
|
301 |
|
302 |
-
|
303 |
matte_img = run_grounded_sam(image_array, prompt, "text", background_prompt, background_type, box_threshold, text_threshold, iou_threshold, scribble_mode, guidance_mode)
|
304 |
-
|
305 |
-
matte_img
|
306 |
-
|
307 |
|
308 |
# exporting the image
|
309 |
matte_img.save(f"result_img-{i}.jpg")
|
@@ -360,26 +365,26 @@ if __name__ == "__main__":
|
|
360 |
#task_type = gr.Dropdown(["scribble_point", "scribble_box", "text"], value="text", label="Prompt type")
|
361 |
#task_type = "text"
|
362 |
text_prompt = gr.Textbox(label="Text prompt", placeholder="the girl in the middle")
|
363 |
-
background_type = gr.Dropdown(["generated_by_text", "real_world_sample"], value="generated_by_text", label="Background type")
|
364 |
background_prompt = gr.Textbox(label="Background prompt", placeholder="downtown area in New York")
|
365 |
|
366 |
run_button = gr.Button(label="Run")
|
367 |
-
with gr.Accordion("Advanced options", open=False):
|
368 |
-
|
369 |
-
|
370 |
-
|
371 |
-
|
372 |
-
|
373 |
-
|
374 |
-
|
375 |
-
|
376 |
-
|
377 |
-
|
378 |
-
|
379 |
-
|
380 |
-
|
381 |
-
|
382 |
-
|
383 |
|
384 |
with gr.Column():
|
385 |
#gallery = gr.Gallery(
|
@@ -388,7 +393,7 @@ if __name__ == "__main__":
|
|
388 |
video_out = gr.Video()
|
389 |
|
390 |
run_button.click(fn=infer, inputs=[
|
391 |
-
video_in, trim_in, text_prompt, background_prompt
|
392 |
|
393 |
block.launch(debug=args.debug, share=args.share, show_error=True)
|
394 |
#block.queue(concurrency_count=100)
|
|
|
114 |
return 'movie.mp4'
|
115 |
|
116 |
|
117 |
+
def run_grounded_sam(input_image, text_prompt, task_type, background_prompt):
|
118 |
+
background_type = "generated_by_text"
|
119 |
+
box_threshold = 0.25
|
120 |
+
text_threshold = 0.25
|
121 |
+
iou_threshold = 0.5
|
122 |
+
scribble_mode = "split"
|
123 |
+
guidance_mode = "alpha"
|
124 |
+
|
125 |
#global groundingdino_model, sam_predictor, generator
|
126 |
|
127 |
# make dir
|
|
|
282 |
green_img = alpha_pred[..., None] * image_ori + (1 - alpha_pred[..., None]) * np.array([PALETTE_back], dtype='uint8')
|
283 |
green_img = np.uint8(green_img)
|
284 |
#return [(com_img, 'composite with background'), (green_img, 'green screen'), (alpha_rgb, 'alpha matte')]
|
285 |
+
return com_img, alpha_rgb
|
286 |
|
287 |
+
def infer(video_in, trim_value, prompt, background_prompt):
|
288 |
print(prompt)
|
289 |
break_vid = get_frames(video_in)
|
290 |
|
|
|
305 |
# Convert the image to a NumPy array
|
306 |
image_array = np.array(to_numpy_i)
|
307 |
|
|
|
308 |
matte_img = run_grounded_sam(image_array, prompt, "text", background_prompt, background_type, box_threshold, text_threshold, iou_threshold, scribble_mode, guidance_mode)
|
309 |
+
|
310 |
+
matte_img= Image.fromarray(matte_img[1])
|
311 |
+
|
312 |
|
313 |
# exporting the image
|
314 |
matte_img.save(f"result_img-{i}.jpg")
|
|
|
365 |
#task_type = gr.Dropdown(["scribble_point", "scribble_box", "text"], value="text", label="Prompt type")
|
366 |
#task_type = "text"
|
367 |
text_prompt = gr.Textbox(label="Text prompt", placeholder="the girl in the middle")
|
368 |
+
#background_type = gr.Dropdown(["generated_by_text", "real_world_sample"], value="generated_by_text", label="Background type")
|
369 |
background_prompt = gr.Textbox(label="Background prompt", placeholder="downtown area in New York")
|
370 |
|
371 |
run_button = gr.Button(label="Run")
|
372 |
+
#with gr.Accordion("Advanced options", open=False):
|
373 |
+
# box_threshold = gr.Slider(
|
374 |
+
# label="Box Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.05
|
375 |
+
# )
|
376 |
+
# text_threshold = gr.Slider(
|
377 |
+
# label="Text Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.05
|
378 |
+
# )
|
379 |
+
# iou_threshold = gr.Slider(
|
380 |
+
# label="IOU Threshold", minimum=0.0, maximum=1.0, value=0.5, step=0.05
|
381 |
+
# )
|
382 |
+
# scribble_mode = gr.Dropdown(
|
383 |
+
# ["merge", "split"], value="split", label="scribble_mode"
|
384 |
+
# )
|
385 |
+
# guidance_mode = gr.Dropdown(
|
386 |
+
# ["mask", "alpha"], value="alpha", label="guidance_mode", info="mask guidance is for complex scenes with multiple instances, alpha guidance is for simple scene with single instance"
|
387 |
+
# )
|
388 |
|
389 |
with gr.Column():
|
390 |
#gallery = gr.Gallery(
|
|
|
393 |
video_out = gr.Video()
|
394 |
|
395 |
run_button.click(fn=infer, inputs=[
|
396 |
+
video_in, trim_in, text_prompt, background_prompt], outputs=video_out)
|
397 |
|
398 |
block.launch(debug=args.debug, share=args.share, show_error=True)
|
399 |
#block.queue(concurrency_count=100)
|