Spaces:

khanrc
/

tcl

Sleeping

App Files Files Community

khanrc commited on Apr 2, 2023

Commit

e0ca513

0 Parent(s):

initial commit

Browse files

Files changed (24) hide show

.gitattributes +34 -0
.gitignore +2 -0
.gitmodules +3 -0
README.md +13 -0
app.py +183 -0
examples/banana.jpg +0 -0
examples/dogs.jpg +0 -0
examples/frodo_sam_gollum.jpg +0 -0
examples/mb_mj.jpg +0 -0
examples/voc_1029.jpg +0 -0
examples/voc_1136.jpg +0 -0
examples/voc_1296.jpg +0 -0
examples/voc_266.jpg +0 -0
examples/voc_294.jpg +0 -0
examples/voc_296.jpg +0 -0
examples/voc_567.jpg +0 -0
examples/voc_59.jpg +0 -0
examples/voc_84.jpg +0 -0
examples/voc_864.jpg +0 -0
examples/voc_97.jpg +0 -0
packages.txt +1 -0
predictor.py +159 -0
requirements.txt +16 -0
tcl +1 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,34 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ *.pth
2	+ __pycache__

.gitmodules ADDED Viewed

	@@ -0,0 +1,3 @@

+[submodule "tcl"]
+	path = tcl
+	url = https://github.com/kakaobrain/tcl.git

README.md ADDED Viewed

	@@ -0,0 +1,13 @@

+---
+title: TCL
+emoji: 💩
+colorFrom: yellow
+colorTo: yellow
+sdk: gradio
+sdk_version: 3.23.0
+app_file: app.py
+app_port: 9718
+pinned: false
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,183 @@

+import os
+import sys
+from importlib.util import find_spec
+print("Prepare demo ...")
+if not os.path.exists("tcl.pth"):
+    print("Download TCL checkpoint ...")
+    os.system("wget -q https://github.com/kakaobrain/tcl/releases/download/v1.0.0/tcl.pth")
+if not (find_spec("mmcv") and find_spec("mmseg")):
+    print("Install mmcv & mmseg ...")
+    os.system("mim install mmcv-full==1.6.2 mmsegmentation==0.27.0")
+if not find_spec("detectron2"):
+    print("Install detectron ...")
+    os.system("pip install git+https://github.com/facebookresearch/detectron2.git")
+sys.path.insert(0, "./tcl/")
+print(" -- done.")
+import json
+from contextlib import ExitStack
+import gradio as gr
+import torch
+from torch.cuda.amp import autocast
+from detectron2.evaluation import inference_context
+from predictor import build_demo_model
+model = build_demo_model()
+if torch.cuda.is_available():
+    device = torch.device("cuda")
+else:
+    device = torch.device("cpu")
+print(f"device: {device}")
+model.to(device)
+title = "TCL: Text-grounded Contrastive Learning"
+description_head = """
+<p style='text-align: center'> <a href='https://arxiv.org/abs/2212.00785' target='_blank'>Paper</a> | <a href='https://github.com/kakaobrain/tcl' target='_blank'>Code</a> </p>
+"""
+description_body = f"""
+Gradio Demo for "Learning to Generate Text-grounded Mask for Open-world Semantic Segmentation from Only Image-Text Pairs".
+Explore TCL's capability to perform open-world semantic segmentation **without any mask annotations**. Choose from provided examples or upload your own image. Use the query format `bg; class1; class2; ...`, with `;` as the separator, and the `bg` background query being optional (as in the third example).
+This demo highlights the strengths and limitations of unsupervised open-world segmentation methods. Although TCL can handle arbitrary concepts, accurately capturing object boundaries without mask annotation remains a challenge.
+"""
+if device.type == "cpu":
+    description_body += f"\nInference takes about 10 seconds since this demo is running on the free CPU device."
+description = description_head + description_body
+article = """
+<p style='text-align: center'><a href='https://arxiv.org/abs/2212.00785' target='_blank'>Learning to Generate Text-grounded Mask for Open-world Semantic Segmentation from Only Image-Text Pairs</a> | <a href='https://github.com/kakaobrain/tcl' target='_blank'>Github Repo</a></p>
+"""
+voc_examples = [
+    ["examples/voc_59.jpg", "bg; cat; dog"],
+    ["examples/voc_97.jpg", "bg; car"],
+    ["examples/voc_266.jpg", "bg; dog"],
+    ["examples/voc_294.jpg", "bg; bird"],
+    ["examples/voc_864.jpg", "bg; cat"],
+    ["examples/voc_1029.jpg", "bg; bus"],
+]
+examples = [
+    [
+        "examples/dogs.jpg",
+        "bg; corgi; shepherd",
+    ],
+    [
+        "examples/dogs.jpg",
+        "bg; dog",
+    ],
+    [
+        "examples/dogs.jpg",
+        "corgi; shepherd; lawn, trees, and fallen leaves",
+    ],
+    [
+        "examples/banana.jpg",
+        "bg; banana",
+    ],
+    [
+        "examples/banana.jpg",
+        "bg; red banana; green banana; yellow banana",
+    ],
+    [
+        "examples/frodo_sam_gollum.jpg",
+        "bg; frodo; gollum; samwise",
+    ],
+    [
+        "examples/frodo_sam_gollum.jpg",
+        "bg; rocks; monster; boys with cape"
+    ],
+    [
+        "examples/mb_mj.jpg",
+        "bg; marlon brando; michael jackson",
+    ],
+]
+examples = examples + voc_examples
+def inference(img, query):
+    query = query.split(";")
+    query = [v.strip() for v in query]
+    with ExitStack() as stack:
+        stack.enter_context(inference_context(model))
+        stack.enter_context(torch.no_grad())
+        with autocast():
+            visualized_output = model.forward_vis(img, query)
+        return visualized_output
+theme = gr.themes.Soft(text_size=gr.themes.sizes.text_md, primary_hue="teal")
+with gr.Blocks(title=title, theme=theme) as demo:
+    gr.Markdown("<h1 style='text-align: center; margin-bottom: 1rem'>" + title + "</h1>")
+    gr.Markdown(description)
+    input_components = []
+    output_components = []
+    with gr.Row():
+        with gr.Column(scale=4, variant="panel"):
+            output_image_gr = gr.outputs.Image(label="Segmentation", type="pil").style(height=300)
+            output_components.append(output_image_gr)
+            with gr.Row():
+                input_gr = gr.inputs.Image(type="pil")
+                query_gr = gr.inputs.Textbox(default="", label="Query")
+                input_components.extend([input_gr, query_gr])
+            with gr.Row():
+                clear_btn = gr.Button("Clear")
+                submit_btn = gr.Button("Submit", variant="primary")
+        inputs = [c for c in input_components if not isinstance(c, gr.State)]
+        outputs = [c for c in output_components if not isinstance(c, gr.State)]
+        with gr.Column(scale=2):
+            examples_handler = gr.Examples(
+                examples=examples,
+                inputs=inputs,
+                outputs=outputs,
+                fn=inference,
+                cache_examples=True,
+                examples_per_page=7,
+            )
+    gr.Markdown(article)
+    submit_btn.click(
+        inference,
+        input_components,
+        output_components,
+        scroll_to_output=True,
+    )
+    clear_btn.click(
+        None,
+        [],
+        (input_components + output_components),
+        _js=f"""() => {json.dumps(
+                    [component.cleared_value if hasattr(component, "cleared_value") else None
+                     for component in input_components + output_components] + (
+                        [gr.Column.update(visible=True)]
+                    )
+                    + ([gr.Column.update(visible=False)])
+                )}
+                """,
+    )
+demo.launch()
+#  demo.launch(server_name="0.0.0.0", server_port=9718)

examples/banana.jpg ADDED Viewed

examples/dogs.jpg ADDED Viewed

examples/frodo_sam_gollum.jpg ADDED Viewed

examples/mb_mj.jpg ADDED Viewed

examples/voc_1029.jpg ADDED Viewed

examples/voc_1136.jpg ADDED Viewed

examples/voc_1296.jpg ADDED Viewed

examples/voc_266.jpg ADDED Viewed

examples/voc_294.jpg ADDED Viewed

examples/voc_296.jpg ADDED Viewed

examples/voc_567.jpg ADDED Viewed

examples/voc_59.jpg ADDED Viewed

examples/voc_84.jpg ADDED Viewed

examples/voc_864.jpg ADDED Viewed

examples/voc_97.jpg ADDED Viewed

packages.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ wget

predictor.py ADDED Viewed

	@@ -0,0 +1,159 @@

+import torch
+import torch.nn as nn
+from torchvision import transforms as T
+from omegaconf import OmegaConf
+from typing import List
+from mmseg import datasets as mmseg_datasets
+from timm.data.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
+import numpy as np
+from PIL import Image
+from detectron2.data import MetadataCatalog
+from detectron2.utils.visualizer import Visualizer
+# TCL
+from models import build_model
+from models.tcl.pamr import PAMR
+from datasets.builder import build_text_transform
+from segmentation.evaluation.builder import build_dataset_class_tokens
+PALETTE = mmseg_datasets.PascalVOCDataset.PALETTE + mmseg_datasets.COCOStuffDataset.PALETTE
+PALETTE *= 5
+def build_demo_model(ckpt_path="./tcl.pth", size=224):
+    # Load TCL model
+    print(f"Load {ckpt_path} ...")
+    ckpt = torch.load(ckpt_path)
+    cfg = OmegaConf.load("./tcl/configs/tcl.yml")
+    model = build_model(cfg.model)
+    # The (minimal) checkpoint only contains learned parameters; Frozen CLIP params are not contained.
+    model.load_state_dict(ckpt['model'], strict=False)
+    model.eval()
+    # build TCLDemo
+    demo = TCLDemo(model, size)
+    return demo
+def _convert_image_to_rgb(image):
+    return image.convert("RGB")
+def _transform(n_px):
+    return T.Compose([
+        T.Resize(n_px, interpolation=T.InterpolationMode.BICUBIC),
+        _convert_image_to_rgb,
+        T.ToTensor(),
+        T.Normalize(IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD),
+    ])
+class TCLDemo(nn.Module):
+    """
+    Args:
+        model: TCL model
+        size: resize shorter side of image to `size`
+    """
+    def __init__(self, model, size=224):
+        super().__init__()
+        self.model = model
+        self.size = size
+        self.preprocess = _transform(size)
+        self.tokenizer = build_text_transform()
+        self.pamr = PAMR(10, [1, 2, 4, 8, 12, 24]).eval()
+    @property
+    def device(self):
+        return next(self.model.parameters()).device
+    def build_text_embedding(self, texts: List[str]):
+        text_tokens = build_dataset_class_tokens(self.tokenizer, "custom", texts)
+        text_embeddings = self.model.build_text_embedding(text_tokens)
+        return text_embeddings
+    def forward(self, image, texts: List[str], apply_pamr=True):
+        """
+        Args:
+            image: PIL.Image
+            texts: List[str]
+        """
+        with_bg = False
+        if texts[0] in ["bg", "background"]:
+            with_bg = True
+            texts = texts[1:]
+        # preprocess
+        image = self.preprocess(image).unsqueeze(0).to(self.device)
+        text_embs = self.build_text_embedding(texts)
+        # forward
+        mask, simmap = self.model.generate_masks(
+            image,
+            text_embs,
+        )
+        # refinement
+        if apply_pamr:
+            mask = self.pamr(image, mask)
+        I, T, H, W = mask.shape
+        if with_bg:
+            bg_thresh = 0.4 if apply_pamr else 0.5
+            bg = torch.full(
+                [I, 1, H, W],
+                bg_thresh,
+                dtype=torch.float,
+                device=mask.device
+            )
+            mask = torch.cat([bg, mask], dim=1)
+        return mask
+    def visualize(self, image, texts, mask):
+        """
+        Args:
+            image (PIL.Image)
+            texts (List[str])
+            mask (Tensor)
+        """
+        with_bg = texts[0] in ["bg", "background"]
+        N = len(texts)
+        if with_bg:
+            palette = PALETTE
+        else:
+            palette = PALETTE[1:]
+        MetadataCatalog.pop("__unused", None)
+        md = MetadataCatalog.get("__unused")
+        md.set(
+            thing_classes=texts,
+            thing_colors=palette,
+            stuff_classes=texts,
+            stuff_colors=palette,
+        )
+        seg_res = mask.squeeze(0).argmax(0).cpu()
+        if with_bg:
+            seg_res[seg_res == 0] = N + 10
+        image = image.resize(mask.shape[2:][::-1])
+        image = np.asarray(image)
+        visualizer = Visualizer(image, md)
+        r = visualizer.draw_sem_seg(seg_res)
+        res = Image.fromarray(r.get_image())
+        return res
+    def forward_vis(self, image, texts, apply_pamr=True):
+        mask = self(image, texts, apply_pamr=apply_pamr)
+        res = self.visualize(image, texts, mask)
+        return res

requirements.txt ADDED Viewed

	@@ -0,0 +1,16 @@

+torch==1.12.1
+torchvision==0.13.1
+webdataset==0.1.103
+timm==0.6.7
+einops==0.4.1
+tqdm==4.62.3
+wandb==0.12.18
+regex==2022.6.2
+braceexpand==0.1.7
+ftfy==6.1.1
+numpy==1.21.2
+omegaconf==2.2.2
+Pillow==9.3.0
+termcolor==1.1.0
+openmim

tcl ADDED Viewed

	@@ -0,0 +1 @@


1	+ Subproject commit e8e84cac4f31c3718356137208c9269477aa1ef8