fffiloni commited on
Commit
3d8a88c
·
1 Parent(s): 0243396

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -26
app.py CHANGED
@@ -114,8 +114,14 @@ def create_video(frames, fps):
114
  return 'movie.mp4'
115
 
116
 
117
- def run_grounded_sam(input_image, text_prompt, task_type, background_prompt, background_type, box_threshold, text_threshold, iou_threshold, scribble_mode, guidance_mode):
118
-
 
 
 
 
 
 
119
  #global groundingdino_model, sam_predictor, generator
120
 
121
  # make dir
@@ -276,9 +282,9 @@ def run_grounded_sam(input_image, text_prompt, task_type, background_prompt, bac
276
  green_img = alpha_pred[..., None] * image_ori + (1 - alpha_pred[..., None]) * np.array([PALETTE_back], dtype='uint8')
277
  green_img = np.uint8(green_img)
278
  #return [(com_img, 'composite with background'), (green_img, 'green screen'), (alpha_rgb, 'alpha matte')]
279
- return alpha_rgb
280
 
281
- def infer(video_in, trim_value, prompt, background_prompt, background_type, box_threshold, text_threshold, iou_threshold, scribble_mode, guidance_mode):
282
  print(prompt)
283
  break_vid = get_frames(video_in)
284
 
@@ -299,11 +305,10 @@ def infer(video_in, trim_value, prompt, background_prompt, background_type, box_
299
  # Convert the image to a NumPy array
300
  image_array = np.array(to_numpy_i)
301
 
302
-
303
  matte_img = run_grounded_sam(image_array, prompt, "text", background_prompt, background_type, box_threshold, text_threshold, iou_threshold, scribble_mode, guidance_mode)
304
- #print(pix2pix_img)u
305
- matte_img = Image.fromarray(matte_img)
306
- #rgb_im = image.convertt("RGB")
307
 
308
  # exporting the image
309
  matte_img.save(f"result_img-{i}.jpg")
@@ -360,26 +365,26 @@ if __name__ == "__main__":
360
  #task_type = gr.Dropdown(["scribble_point", "scribble_box", "text"], value="text", label="Prompt type")
361
  #task_type = "text"
362
  text_prompt = gr.Textbox(label="Text prompt", placeholder="the girl in the middle")
363
- background_type = gr.Dropdown(["generated_by_text", "real_world_sample"], value="generated_by_text", label="Background type")
364
  background_prompt = gr.Textbox(label="Background prompt", placeholder="downtown area in New York")
365
 
366
  run_button = gr.Button(label="Run")
367
- with gr.Accordion("Advanced options", open=False):
368
- box_threshold = gr.Slider(
369
- label="Box Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.05
370
- )
371
- text_threshold = gr.Slider(
372
- label="Text Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.05
373
- )
374
- iou_threshold = gr.Slider(
375
- label="IOU Threshold", minimum=0.0, maximum=1.0, value=0.5, step=0.05
376
- )
377
- scribble_mode = gr.Dropdown(
378
- ["merge", "split"], value="split", label="scribble_mode"
379
- )
380
- guidance_mode = gr.Dropdown(
381
- ["mask", "alpha"], value="alpha", label="guidance_mode", info="mask guidance is for complex scenes with multiple instances, alpha guidance is for simple scene with single instance"
382
- )
383
 
384
  with gr.Column():
385
  #gallery = gr.Gallery(
@@ -388,7 +393,7 @@ if __name__ == "__main__":
388
  video_out = gr.Video()
389
 
390
  run_button.click(fn=infer, inputs=[
391
- video_in, trim_in, text_prompt, background_prompt, background_type, box_threshold, text_threshold, iou_threshold, scribble_mode, guidance_mode], outputs=video_out)
392
 
393
  block.launch(debug=args.debug, share=args.share, show_error=True)
394
  #block.queue(concurrency_count=100)
 
114
  return 'movie.mp4'
115
 
116
 
117
+ def run_grounded_sam(input_image, text_prompt, task_type, background_prompt):
118
+ background_type = "generated_by_text"
119
+ box_threshold = 0.25
120
+ text_threshold = 0.25
121
+ iou_threshold = 0.5
122
+ scribble_mode = "split"
123
+ guidance_mode = "alpha"
124
+
125
  #global groundingdino_model, sam_predictor, generator
126
 
127
  # make dir
 
282
  green_img = alpha_pred[..., None] * image_ori + (1 - alpha_pred[..., None]) * np.array([PALETTE_back], dtype='uint8')
283
  green_img = np.uint8(green_img)
284
  #return [(com_img, 'composite with background'), (green_img, 'green screen'), (alpha_rgb, 'alpha matte')]
285
+ return com_img, alpha_rgb
286
 
287
+ def infer(video_in, trim_value, prompt, background_prompt):
288
  print(prompt)
289
  break_vid = get_frames(video_in)
290
 
 
305
  # Convert the image to a NumPy array
306
  image_array = np.array(to_numpy_i)
307
 
 
308
  matte_img = run_grounded_sam(image_array, prompt, "text", background_prompt, background_type, box_threshold, text_threshold, iou_threshold, scribble_mode, guidance_mode)
309
+
310
+ matte_img= Image.fromarray(matte_img[1])
311
+
312
 
313
  # exporting the image
314
  matte_img.save(f"result_img-{i}.jpg")
 
365
  #task_type = gr.Dropdown(["scribble_point", "scribble_box", "text"], value="text", label="Prompt type")
366
  #task_type = "text"
367
  text_prompt = gr.Textbox(label="Text prompt", placeholder="the girl in the middle")
368
+ #background_type = gr.Dropdown(["generated_by_text", "real_world_sample"], value="generated_by_text", label="Background type")
369
  background_prompt = gr.Textbox(label="Background prompt", placeholder="downtown area in New York")
370
 
371
  run_button = gr.Button(label="Run")
372
+ #with gr.Accordion("Advanced options", open=False):
373
+ # box_threshold = gr.Slider(
374
+ # label="Box Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.05
375
+ # )
376
+ # text_threshold = gr.Slider(
377
+ # label="Text Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.05
378
+ # )
379
+ # iou_threshold = gr.Slider(
380
+ # label="IOU Threshold", minimum=0.0, maximum=1.0, value=0.5, step=0.05
381
+ # )
382
+ # scribble_mode = gr.Dropdown(
383
+ # ["merge", "split"], value="split", label="scribble_mode"
384
+ # )
385
+ # guidance_mode = gr.Dropdown(
386
+ # ["mask", "alpha"], value="alpha", label="guidance_mode", info="mask guidance is for complex scenes with multiple instances, alpha guidance is for simple scene with single instance"
387
+ # )
388
 
389
  with gr.Column():
390
  #gallery = gr.Gallery(
 
393
  video_out = gr.Video()
394
 
395
  run_button.click(fn=infer, inputs=[
396
+ video_in, trim_in, text_prompt, background_prompt], outputs=video_out)
397
 
398
  block.launch(debug=args.debug, share=args.share, show_error=True)
399
  #block.queue(concurrency_count=100)