huy-ha commited on
Commit
2145817
·
1 Parent(s): 09dc5c4

add description and fast option

Browse files
Files changed (2) hide show
  1. CLIP/clip/__init__.py +12 -0
  2. app.py +55 -7
CLIP/clip/__init__.py CHANGED
@@ -30,6 +30,18 @@ saliency_configs = {
30
  {"tile_size": img_dim // 4, "stride": (img_dim // 4) // 4},
31
  ],
32
  },
 
 
 
 
 
 
 
 
 
 
 
 
33
  "chefer_et_al": lambda img_dim: {
34
  "distractor_labels": {},
35
  "horizontal_flipping": False,
 
30
  {"tile_size": img_dim // 4, "stride": (img_dim // 4) // 4},
31
  ],
32
  },
33
+ "ours_fast": lambda img_dim: {
34
+ "distractor_labels": {},
35
+ "horizontal_flipping": True,
36
+ "augmentations": 2,
37
+ "imagenet_prompt_ensemble": False,
38
+ "positive_attn_only": True,
39
+ "cropping_augmentations": [
40
+ {"tile_size": img_dim, "stride": img_dim // 4},
41
+ {"tile_size": int(img_dim * 2 / 3), "stride": int(img_dim * 2 / 3) // 4},
42
+ {"tile_size": img_dim // 2, "stride": (img_dim // 2) // 4},
43
+ ],
44
+ },
45
  "chefer_et_al": lambda img_dim: {
46
  "distractor_labels": {},
47
  "horizontal_flipping": False,
app.py CHANGED
@@ -4,7 +4,7 @@ from CLIP.clip import ClipWrapper, saliency_configs
4
  from time import time
5
  from matplotlib import pyplot as plt
6
  import io
7
- from PIL import Image
8
 
9
 
10
  def plot_to_png(fig):
@@ -15,12 +15,32 @@ def plot_to_png(fig):
15
  return img
16
 
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  def generate_relevancy(
19
  img: np.array, labels: str, prompt: str, saliency_config: str, subtract_mean: bool
20
  ):
21
  labels = labels.split(",")
22
  prompts = [prompt]
23
- img = np.asarray(Image.fromarray(img).resize((244 * 2, 244 * 2)))
24
  assert img.dtype == np.uint8
25
  h, w, c = img.shape
26
  grads = ClipWrapper.get_clip_saliency(
@@ -38,14 +58,17 @@ def generate_relevancy(
38
 
39
  returns = []
40
  for label_grad, label in zip(grads, labels):
41
- fig, ax = plt.subplots(1, 1)
42
  ax.axis("off")
43
  ax.imshow(img)
44
- ax.set_title(label, fontsize=12)
45
  grad = np.clip((label_grad - vmin) / (vmax - vmin), a_min=0.0, a_max=1.0)
46
  colored_grad = cmap(grad)
47
  grad = 1 - grad
48
  colored_grad[..., -1] = grad * 0.7
 
 
 
 
49
  ax.imshow(colored_grad)
50
  plt.tight_layout(pad=0)
51
  returns.append(plot_to_png(fig))
@@ -54,6 +77,10 @@ def generate_relevancy(
54
 
55
 
56
  iface = gr.Interface(
 
 
 
 
57
  fn=generate_relevancy,
58
  inputs=[
59
  gr.Image(type="numpy", label="Image"),
@@ -61,7 +88,7 @@ iface = gr.Interface(
61
  gr.Textbox(label="Prompt"),
62
  gr.Dropdown(
63
  value="ours",
64
- choices=["ours", "chefer_et_al"],
65
  label="Relevancy Configuration",
66
  ),
67
  gr.Checkbox(value=True, label="subtract mean"),
@@ -69,12 +96,33 @@ iface = gr.Interface(
69
  outputs=gr.Gallery(label="Relevancy Maps", type="numpy"),
70
  examples=[
71
  [
72
- "https://semantic-abstraction.cs.columbia.edu/downloads/matterport.png",
73
  "basketball jersey,nintendo switch,television,ping pong table,vase,fireplace,abstract painting of a vespa,carpet,wall",
74
  "a photograph of a {} in a home.",
75
  "ours",
76
  True,
77
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  ],
79
  )
80
  # iface.launch(share=True)
 
4
  from time import time
5
  from matplotlib import pyplot as plt
6
  import io
7
+ from PIL import Image, ImageDraw, ImageFont
8
 
9
 
10
  def plot_to_png(fig):
 
15
  return img
16
 
17
 
18
+ def add_text_to_image(
19
+ image: np.ndarray,
20
+ text,
21
+ position,
22
+ color="rgb(255, 255, 255)",
23
+ fontsize=60,
24
+ ):
25
+ image = Image.fromarray(image)
26
+ draw = ImageDraw.Draw(image)
27
+ draw.text(
28
+ position,
29
+ text,
30
+ fill=color,
31
+ font=ImageFont.truetype(
32
+ "/usr/share/fonts/truetype/lato/Lato-Medium.ttf", fontsize
33
+ ),
34
+ )
35
+ return np.array(image)
36
+
37
+
38
  def generate_relevancy(
39
  img: np.array, labels: str, prompt: str, saliency_config: str, subtract_mean: bool
40
  ):
41
  labels = labels.split(",")
42
  prompts = [prompt]
43
+ img = np.asarray(Image.fromarray(img).resize((244 * 4, 244 * 4)))
44
  assert img.dtype == np.uint8
45
  h, w, c = img.shape
46
  grads = ClipWrapper.get_clip_saliency(
 
58
 
59
  returns = []
60
  for label_grad, label in zip(grads, labels):
61
+ fig, ax = plt.subplots(1, 1, figsize=(4, 4))
62
  ax.axis("off")
63
  ax.imshow(img)
 
64
  grad = np.clip((label_grad - vmin) / (vmax - vmin), a_min=0.0, a_max=1.0)
65
  colored_grad = cmap(grad)
66
  grad = 1 - grad
67
  colored_grad[..., -1] = grad * 0.7
68
+ colored_grad = add_text_to_image(
69
+ (colored_grad * 255).astype(np.uint8), text=label, position=(0, 0)
70
+ )
71
+ colored_grad = colored_grad.astype(float) / 255
72
  ax.imshow(colored_grad)
73
  plt.tight_layout(pad=0)
74
  returns.append(plot_to_png(fig))
 
77
 
78
 
79
  iface = gr.Interface(
80
+ title="Semantic Abstraction Multi-scale Relevancy Extractor",
81
+ description="""A CPU-only demo of [Semantic Abstraction](https://semantic-abstraction.cs.columbia.edu/)'s Multi-Scale Relevancy Extractor. To run GPU inference locally, use the [official codebase release](https://github.com/columbia-ai-robotics/semantic-abstraction).
82
+
83
+ This relevancy extractor builds heavily on [Chefer et al.'s codebase](https://github.com/hila-chefer/Transformer-MM-Explainability) and [CLIP on Wheels' codebase](https://cow.cs.columbia.edu/).""",
84
  fn=generate_relevancy,
85
  inputs=[
86
  gr.Image(type="numpy", label="Image"),
 
88
  gr.Textbox(label="Prompt"),
89
  gr.Dropdown(
90
  value="ours",
91
+ choices=["ours", "ours_fast", "chefer_et_al"],
92
  label="Relevancy Configuration",
93
  ),
94
  gr.Checkbox(value=True, label="subtract mean"),
 
96
  outputs=gr.Gallery(label="Relevancy Maps", type="numpy"),
97
  examples=[
98
  [
99
+ "https://semantic-abstraction.cs.columbia.edu/downloads/gameroom.png",
100
  "basketball jersey,nintendo switch,television,ping pong table,vase,fireplace,abstract painting of a vespa,carpet,wall",
101
  "a photograph of a {} in a home.",
102
  "ours",
103
  True,
104
+ ],
105
+ [
106
+ "https://semantic-abstraction.cs.columbia.edu/downloads/livingroom.png",
107
+ "monopoly boardgame set,door knob,sofa,coffee table,plant,carpet,wall",
108
+ "a photograph of a {} in a home.",
109
+ "ours",
110
+ True,
111
+ ],
112
+ [
113
+ "https://semantic-abstraction.cs.columbia.edu/downloads/fireplace.png",
114
+ "fireplace,beige armchair,candle,large indoor plant in a pot,forest painting,cheetah-patterned pillow,floor,carpet,wall",
115
+ "a photograph of a {} in a home.",
116
+ "ours",
117
+ True,
118
+ ],
119
+ [
120
+ "https://semantic-abstraction.cs.columbia.edu/downloads/walle.png",
121
+ "WALL-E,a fire extinguisher",
122
+ "a 3D render of {}.",
123
+ "ours",
124
+ True,
125
+ ],
126
  ],
127
  )
128
  # iface.launch(share=True)