Spaces:
Build error
Build error
add description and fast option
Browse files- CLIP/clip/__init__.py +12 -0
- app.py +55 -7
CLIP/clip/__init__.py
CHANGED
@@ -30,6 +30,18 @@ saliency_configs = {
|
|
30 |
{"tile_size": img_dim // 4, "stride": (img_dim // 4) // 4},
|
31 |
],
|
32 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
"chefer_et_al": lambda img_dim: {
|
34 |
"distractor_labels": {},
|
35 |
"horizontal_flipping": False,
|
|
|
30 |
{"tile_size": img_dim // 4, "stride": (img_dim // 4) // 4},
|
31 |
],
|
32 |
},
|
33 |
+
"ours_fast": lambda img_dim: {
|
34 |
+
"distractor_labels": {},
|
35 |
+
"horizontal_flipping": True,
|
36 |
+
"augmentations": 2,
|
37 |
+
"imagenet_prompt_ensemble": False,
|
38 |
+
"positive_attn_only": True,
|
39 |
+
"cropping_augmentations": [
|
40 |
+
{"tile_size": img_dim, "stride": img_dim // 4},
|
41 |
+
{"tile_size": int(img_dim * 2 / 3), "stride": int(img_dim * 2 / 3) // 4},
|
42 |
+
{"tile_size": img_dim // 2, "stride": (img_dim // 2) // 4},
|
43 |
+
],
|
44 |
+
},
|
45 |
"chefer_et_al": lambda img_dim: {
|
46 |
"distractor_labels": {},
|
47 |
"horizontal_flipping": False,
|
app.py
CHANGED
@@ -4,7 +4,7 @@ from CLIP.clip import ClipWrapper, saliency_configs
|
|
4 |
from time import time
|
5 |
from matplotlib import pyplot as plt
|
6 |
import io
|
7 |
-
from PIL import Image
|
8 |
|
9 |
|
10 |
def plot_to_png(fig):
|
@@ -15,12 +15,32 @@ def plot_to_png(fig):
|
|
15 |
return img
|
16 |
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
def generate_relevancy(
|
19 |
img: np.array, labels: str, prompt: str, saliency_config: str, subtract_mean: bool
|
20 |
):
|
21 |
labels = labels.split(",")
|
22 |
prompts = [prompt]
|
23 |
-
img = np.asarray(Image.fromarray(img).resize((244 *
|
24 |
assert img.dtype == np.uint8
|
25 |
h, w, c = img.shape
|
26 |
grads = ClipWrapper.get_clip_saliency(
|
@@ -38,14 +58,17 @@ def generate_relevancy(
|
|
38 |
|
39 |
returns = []
|
40 |
for label_grad, label in zip(grads, labels):
|
41 |
-
fig, ax = plt.subplots(1, 1)
|
42 |
ax.axis("off")
|
43 |
ax.imshow(img)
|
44 |
-
ax.set_title(label, fontsize=12)
|
45 |
grad = np.clip((label_grad - vmin) / (vmax - vmin), a_min=0.0, a_max=1.0)
|
46 |
colored_grad = cmap(grad)
|
47 |
grad = 1 - grad
|
48 |
colored_grad[..., -1] = grad * 0.7
|
|
|
|
|
|
|
|
|
49 |
ax.imshow(colored_grad)
|
50 |
plt.tight_layout(pad=0)
|
51 |
returns.append(plot_to_png(fig))
|
@@ -54,6 +77,10 @@ def generate_relevancy(
|
|
54 |
|
55 |
|
56 |
iface = gr.Interface(
|
|
|
|
|
|
|
|
|
57 |
fn=generate_relevancy,
|
58 |
inputs=[
|
59 |
gr.Image(type="numpy", label="Image"),
|
@@ -61,7 +88,7 @@ iface = gr.Interface(
|
|
61 |
gr.Textbox(label="Prompt"),
|
62 |
gr.Dropdown(
|
63 |
value="ours",
|
64 |
-
choices=["ours", "chefer_et_al"],
|
65 |
label="Relevancy Configuration",
|
66 |
),
|
67 |
gr.Checkbox(value=True, label="subtract mean"),
|
@@ -69,12 +96,33 @@ iface = gr.Interface(
|
|
69 |
outputs=gr.Gallery(label="Relevancy Maps", type="numpy"),
|
70 |
examples=[
|
71 |
[
|
72 |
-
"https://semantic-abstraction.cs.columbia.edu/downloads/
|
73 |
"basketball jersey,nintendo switch,television,ping pong table,vase,fireplace,abstract painting of a vespa,carpet,wall",
|
74 |
"a photograph of a {} in a home.",
|
75 |
"ours",
|
76 |
True,
|
77 |
-
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
],
|
79 |
)
|
80 |
# iface.launch(share=True)
|
|
|
4 |
from time import time
|
5 |
from matplotlib import pyplot as plt
|
6 |
import io
|
7 |
+
from PIL import Image, ImageDraw, ImageFont
|
8 |
|
9 |
|
10 |
def plot_to_png(fig):
|
|
|
15 |
return img
|
16 |
|
17 |
|
18 |
+
def add_text_to_image(
|
19 |
+
image: np.ndarray,
|
20 |
+
text,
|
21 |
+
position,
|
22 |
+
color="rgb(255, 255, 255)",
|
23 |
+
fontsize=60,
|
24 |
+
):
|
25 |
+
image = Image.fromarray(image)
|
26 |
+
draw = ImageDraw.Draw(image)
|
27 |
+
draw.text(
|
28 |
+
position,
|
29 |
+
text,
|
30 |
+
fill=color,
|
31 |
+
font=ImageFont.truetype(
|
32 |
+
"/usr/share/fonts/truetype/lato/Lato-Medium.ttf", fontsize
|
33 |
+
),
|
34 |
+
)
|
35 |
+
return np.array(image)
|
36 |
+
|
37 |
+
|
38 |
def generate_relevancy(
|
39 |
img: np.array, labels: str, prompt: str, saliency_config: str, subtract_mean: bool
|
40 |
):
|
41 |
labels = labels.split(",")
|
42 |
prompts = [prompt]
|
43 |
+
img = np.asarray(Image.fromarray(img).resize((244 * 4, 244 * 4)))
|
44 |
assert img.dtype == np.uint8
|
45 |
h, w, c = img.shape
|
46 |
grads = ClipWrapper.get_clip_saliency(
|
|
|
58 |
|
59 |
returns = []
|
60 |
for label_grad, label in zip(grads, labels):
|
61 |
+
fig, ax = plt.subplots(1, 1, figsize=(4, 4))
|
62 |
ax.axis("off")
|
63 |
ax.imshow(img)
|
|
|
64 |
grad = np.clip((label_grad - vmin) / (vmax - vmin), a_min=0.0, a_max=1.0)
|
65 |
colored_grad = cmap(grad)
|
66 |
grad = 1 - grad
|
67 |
colored_grad[..., -1] = grad * 0.7
|
68 |
+
colored_grad = add_text_to_image(
|
69 |
+
(colored_grad * 255).astype(np.uint8), text=label, position=(0, 0)
|
70 |
+
)
|
71 |
+
colored_grad = colored_grad.astype(float) / 255
|
72 |
ax.imshow(colored_grad)
|
73 |
plt.tight_layout(pad=0)
|
74 |
returns.append(plot_to_png(fig))
|
|
|
77 |
|
78 |
|
79 |
iface = gr.Interface(
|
80 |
+
title="Semantic Abstraction Multi-scale Relevancy Extractor",
|
81 |
+
description="""A CPU-only demo of [Semantic Abstraction](https://semantic-abstraction.cs.columbia.edu/)'s Multi-Scale Relevancy Extractor. To run GPU inference locally, use the [official codebase release](https://github.com/columbia-ai-robotics/semantic-abstraction).
|
82 |
+
|
83 |
+
This relevancy extractor builds heavily on [Chefer et al.'s codebase](https://github.com/hila-chefer/Transformer-MM-Explainability) and [CLIP on Wheels' codebase](https://cow.cs.columbia.edu/).""",
|
84 |
fn=generate_relevancy,
|
85 |
inputs=[
|
86 |
gr.Image(type="numpy", label="Image"),
|
|
|
88 |
gr.Textbox(label="Prompt"),
|
89 |
gr.Dropdown(
|
90 |
value="ours",
|
91 |
+
choices=["ours", "ours_fast", "chefer_et_al"],
|
92 |
label="Relevancy Configuration",
|
93 |
),
|
94 |
gr.Checkbox(value=True, label="subtract mean"),
|
|
|
96 |
outputs=gr.Gallery(label="Relevancy Maps", type="numpy"),
|
97 |
examples=[
|
98 |
[
|
99 |
+
"https://semantic-abstraction.cs.columbia.edu/downloads/gameroom.png",
|
100 |
"basketball jersey,nintendo switch,television,ping pong table,vase,fireplace,abstract painting of a vespa,carpet,wall",
|
101 |
"a photograph of a {} in a home.",
|
102 |
"ours",
|
103 |
True,
|
104 |
+
],
|
105 |
+
[
|
106 |
+
"https://semantic-abstraction.cs.columbia.edu/downloads/livingroom.png",
|
107 |
+
"monopoly boardgame set,door knob,sofa,coffee table,plant,carpet,wall",
|
108 |
+
"a photograph of a {} in a home.",
|
109 |
+
"ours",
|
110 |
+
True,
|
111 |
+
],
|
112 |
+
[
|
113 |
+
"https://semantic-abstraction.cs.columbia.edu/downloads/fireplace.png",
|
114 |
+
"fireplace,beige armchair,candle,large indoor plant in a pot,forest painting,cheetah-patterned pillow,floor,carpet,wall",
|
115 |
+
"a photograph of a {} in a home.",
|
116 |
+
"ours",
|
117 |
+
True,
|
118 |
+
],
|
119 |
+
[
|
120 |
+
"https://semantic-abstraction.cs.columbia.edu/downloads/walle.png",
|
121 |
+
"WALL-E,a fire extinguisher",
|
122 |
+
"a 3D render of {}.",
|
123 |
+
"ours",
|
124 |
+
True,
|
125 |
+
],
|
126 |
],
|
127 |
)
|
128 |
# iface.launch(share=True)
|