Spaces:

qubvel-hf
/

xfeat

Running

App Files Files Community

qubvel-hf HF Staff commited on May 26, 2024

Commit

9b7fcdb

0 Parent(s):

Clean proj with LFS

Browse files

Files changed (33) hide show

.gitattributes +5 -0
.gitignore +161 -0
.vscode/settings.json +3 -0
LICENSE +201 -0
README.md +171 -0
app.py +94 -0
assets/demo1.jpg +3 -0
assets/demo2.jpg +3 -0
assets/ref.png +3 -0
assets/tgt.png +3 -0
assets/tower-1.webp +3 -0
assets/tower-2.jpeg +0 -0
assets/xfeat_arq.png +3 -0
figs/sift.gif +3 -0
figs/ufmg.png +3 -0
figs/verlab.png +3 -0
figs/xfeat.gif +3 -0
figs/xfeat_arq.png +3 -0
figs/xfeat_quali.jpg +3 -0
hubconf.py +15 -0
minimal_example.py +49 -0
modules/__init__.py +4 -0
modules/interpolator.py +33 -0
modules/model.py +154 -0
modules/xfeat.py +346 -0
notebooks/minimal_example.ipynb +256 -0
notebooks/xfeat_matching.ipynb +0 -0
notebooks/xfeat_torch_hub.ipynb +0 -0
packages.txt +1 -0
realtime_demo.py +295 -0
requirements.txt +3 -0
utils.py +166 -0
weights/xfeat.pt +3 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,5 @@

+*.png filter=lfs diff=lfs merge=lfs -text
+*.jpg filter=lfs diff=lfs merge=lfs -text
+*.webp filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.gif filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,161 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+/gradio*

.vscode/settings.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+    "python.pythonPath": "/conda/install/envs/policygrad/bin/python"
+}

LICENSE ADDED Viewed

	@@ -0,0 +1,201 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

README.md ADDED Viewed

	@@ -0,0 +1,171 @@

+## XFeat: Accelerated Features for Lightweight Image Matching
+[Guilherme Potje](https://guipotje.github.io/) · [Felipe Cadar](https://eucadar.com/) · [Andre Araujo](https://andrefaraujo.github.io/) · [Renato Martins](https://renatojmsdh.github.io/) · [Erickson R. Nascimento](https://homepages.dcc.ufmg.br/~erickson/)
+[![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](LICENSE)
+[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/verlab/accelerated_features/blob/main/notebooks/xfeat_matching.ipynb)
+### [[ArXiv]](https://arxiv.org/abs/2404.19174) | [[Project Page]](https://www.verlab.dcc.ufmg.br/descriptors/xfeat_cvpr24/) |  [[CVPR'24 Paper]](https://cvpr.thecvf.com/)
+<div align="center" style="display: flex; justify-content: center; align-items: center; flex-direction: column;">
+  <div style="display: flex; justify-content: space-around; width: 100%;">
+    <img src='./figs/xfeat.gif' width="400"/>
+    <img src='./figs/sift.gif' width="400"/>
+  </div>
+  Real-time XFeat demonstration (left) compared to SIFT (right) on a textureless scene. SIFT cannot handle fast camera movements, while XFeat provides robust matches under adverse conditions, while being faster than SIFT on CPU.
+</div>
+**TL;DR**: Really fast learned keypoint detector and descriptor. Supports sparse and semi-dense matching.
+Just wanna quickly try on your images? Check this out: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/verlab/accelerated_features/blob/main/notebooks/xfeat_torch_hub.ipynb)
+## Table of Contents
+- [Introduction](#introduction) <img align="right" src='./figs/xfeat_quali.jpg' width=360 />
+- [Installation](#installation)
+- [Usage](#usage)
+  - [Inference](#inference)
+  - [Training](#training)
+  - [Evaluation](#evaluation)
+- [Real-time demo app](#real-time-demo)
+- [Contribute](#contributing)
+- [Citation](#citation)
+- [License](#license)
+- [Acknowledgements](#acknowledgements)
+## Introduction
+This repository contains the official implementation of the paper: *[XFeat: Accelerated Features for Lightweight Image Matching](https://arxiv.org/abs/2404.19174)*, to be presented at CVPR 2024.
+**Motivation.** Why another keypoint detector and descriptor among dozens of existing ones? We noticed that the current trend in the literature focuses on accuracy but often neglects compute efficiency, especially when deploying these solutions in the real-world. For applications in mobile robotics and augmented reality, it is critical that models can run on hardware-constrained computers. To this end, XFeat was designed as an agnostic solution focusing on both accuracy and efficiency in an image matching pipeline.
+**Capabilities.**
+- Real-time sparse inference on CPU for VGA images (tested on laptop with an i5 CPU and vanilla pytorch);
+- Simple architecture components which facilitates deployment on embedded devices (jetson, raspberry pi, custom AI chips, etc..);
+- Supports both sparse and semi-dense matching of local features;
+- Compact descriptors (64D);
+- Performance comparable to known deep local features such as SuperPoint while being significantly faster and more lightweight. Also, XFeat exhibits much better robustness to viewpoint and illumination changes than classic local features as ORB and SIFT;
+- Supports batched inference if you want ridiculously fast feature extraction. On VGA sparse setting, we achieved about 1,400 FPS using an RTX 4090.
+- For single batch inference on GPU (VGA), one can easily achieve over 150 FPS while leaving lots of room on the GPU for other concurrent tasks.
+##
+**Paper Abstract.** We introduce a lightweight and accurate architecture for resource-efficient visual correspondence. Our method, dubbed XFeat (Accelerated Features), revisits fundamental design choices in convolutional neural networks for detecting, extracting, and matching local features. Our new model satisfies a critical need for fast and robust algorithms suitable to resource-limited devices. In particular, accurate image matching requires sufficiently large image resolutions -- for this reason, we keep the resolution as large as possible while limiting the number of channels in the network. Besides, our model is designed to offer the choice of matching at the sparse or semi-dense levels, each of which may be more suitable for different downstream applications, such as visual navigation and augmented reality. Our model is the first to offer semi-dense matching efficiently, leveraging a novel match refinement module that relies on coarse local descriptors. XFeat is versatile and hardware-independent, surpassing current deep learning-based local features in speed (up to 5x faster) with comparable or better accuracy, proven in pose estimation and visual localization. We showcase it running in real-time on an inexpensive laptop CPU without specialized hardware optimizations.
+**Overview of XFeat's achitecture.**
+XFeat extracts a keypoint heatmap $\mathbf{K}$, a compact 64-D dense descriptor map $\mathbf{F}$, and a reliability heatmap $\mathbf{R}$. It achieves unparalleled speed via early downsampling and shallow convolutions, followed by deeper convolutions in later encoders for robustness. Contrary to typical methods, it separates keypoint detection into a distinct branch, using $1 \times 1$ convolutions on an $8 \times 8$ tensor-block-transformed image for fast processing, being one of the few current learned methods that decouples detection & description and can be processed independently.
+<img align="center" src="./figs/xfeat_arq.png" width=1000 />
+## Installation
+XFeat has minimal dependencies, only relying on torch. Also, XFeat does not need a GPU for real-time sparse inference (vanilla pytorch w/o any special optimization), unless you run it on high-res images. If you want to run the real-time matching demo, you will also need OpenCV.
+We recommend using conda, but you can use any virtualenv of your choice.
+If you use conda, just create a new env with:
+```bash
+git clone https://github.com/verlab/accelerated_features.git
+cd accelerated_features
+#Create conda env
+conda create -n xfeat python=3.8
+conda activate xfeat
+```
+Then, install [pytorch (>=1.10)](https://pytorch.org/get-started/previous-versions/) and then the rest of depencencies in case you want to run the demos:
+```bash
+#CPU only, for GPU check in pytorch website the most suitable version to your gpu.
+pip install torch==1.10.1+cpu -f https://download.pytorch.org/whl/cpu/torch_stable.html
+# CPU only for MacOS
+# pip install torch==1.10.1 -f https://download.pytorch.org/whl/cpu/torch_stable.html
+#Install dependencies for the demo
+pip install opencv-contrib-python tqdm
+```
+## Usage
+For your convenience, we provide ready to use notebooks for some examples.
+|            **Description**     |  **Notebook**                     |
+|--------------------------------|-------------------------------|
+| Minimal example | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/verlab/accelerated_features/blob/main/notebooks/minimal_example.ipynb) |
+| Matching & registration example | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/verlab/accelerated_features/blob/main/notebooks/xfeat_matching.ipynb) |
+| Torch hub example | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/verlab/accelerated_features/blob/main/notebooks/xfeat_torch_hub.ipynb) |
+### Inference
+To run XFeat on an image, three lines of code is enough:
+```python
+from modules.xfeat import XFeat
+xfeat = XFeat()
+#Simple inference with batch sz = 1
+output = xfeat.detectAndCompute(torch.randn(1,3,480,640), top_k = 4096)[0]
+```
+Or you can use this [script](./minimal_example.py) in the root folder:
+```bash
+python3 minimal_example.py
+```
+If you already have pytorch, simply use torch hub if you like it:
+```python
+import torch
+xfeat = torch.hub.load('verlab/accelerated_features', 'XFeat', pretrained = True, top_k = 4096)
+#Simple inference with batch sz = 1
+output = xfeat.detectAndCompute(torch.randn(1,3,480,640), top_k = 4096)[0]
+```
+### Training
+XFeat training code will be released soon. Please stay tuned.
+### Evaluation
+XFeat evaluation code will be released soon, alongside the training scripts. Please stay tuned.
+## Real-time Demo
+To demonstrate the capabilities of XFeat, we provide a real-time matching demo with Homography registration. Currently, you can experiment with XFeat, ORB and SIFT. You will need a working webcam. To run the demo and show the possible input flags, please run:
+```bash
+python3 realtime_demo.py -h
+```
+Don't forget to press 's' to set a desired reference image. Notice that the demo only works correctly for planar scenes and rotation-only motion, because we're using a homography model.
+If you want to run the demo with XFeat, please run:
+```bash
+python3 realtime_demo.py --method XFeat
+```
+Or test with SIFT or ORB:
+```bash
+python3 realtime_demo.py --method SIFT
+python3 realtime_demo.py --method ORB
+```
+## Contributing
+Contributions to XFeat are welcome!
+Currently, it would be nice to have an export script to efficient deployment engines such as TensorRT and ONNX. Also, it would be cool to train a lightweight learned matcher on top of XFeat local features.
+## Citation
+If you find this code useful for your research, please cite the paper:
+```bibtex
+@INPROCEEDINGS{potje2024cvpr,
+  author={Guilherme {Potje} and and Felipe {Cadar} and Andre {Araujo} and Renato {Martins} and Erickson R. {Nascimento}},
+  booktitle={2024 IEEE / CVF Computer Vision and Pattern Recognition (CVPR)},
+  title={XFeat: Accelerated Features for Lightweight Image Matching},
+  year={2024}}
+```
+## License
+[![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](LICENSE)
+## Acknowledgements
+- We thank the agencies CAPES, CNPq, and Google for funding different parts of this work.
+- We thank the developers of Kornia for the [kornia library](https://github.com/kornia/kornia)!
+**VeRLab:** Laboratory of Computer Vison and Robotics https://www.verlab.dcc.ufmg.br
+<br>
+<img align="left" width="auto" height="50" src="./figs/ufmg.png">
+<img align="right" width="auto" height="50" src="./figs/verlab.png">
+<br/>

app.py ADDED Viewed

	@@ -0,0 +1,94 @@

+import cv2
+import numpy as np
+import gradio as gr
+from modules.xfeat import XFeat
+from utils import visualize_matches
+HEADER = """
+<div align="center">
+    <p>
+        <span style="font-size: 30px; vertical-align: bottom;"> XFeat: Accelerated Features for Lightweight Image Matching</span>
+    </p>
+    <p style="margin-top: -15px;">
+        <a href="https://arxiv.org/abs/2404.19174" target="_blank" style="color: grey;">ArXiv Paper</a>
+        &nbsp;
+        <a href="https://github.com/verlab/accelerated_features" target="_blank" style="color: grey;">GitHub Repository</a>
+    </p>
+    <p>
+        Upload two images 🖼️ of the object and identify matches between them 🚀
+    </p>
+</div>
+"""
+ABSTRACT = """
+We introduce a lightweight and accurate architecture for resource-efficient visual correspondence. Our method, dubbed XFeat (Accelerated Features), revisits fundamental design choices in convolutional neural networks for detecting, extracting, and matching local features. Our new model satisfies a critical need for fast and robust algorithms suitable to resource-limited devices. In particular, accurate image matching requires sufficiently large image resolutions -- for this reason, we keep the resolution as large as possible while limiting the number of channels in the network. Besides, our model is designed to offer the choice of matching at the sparse or semi-dense levels, each of which may be more suitable for different downstream applications, such as visual navigation and augmented reality. Our model is the first to offer semi-dense matching efficiently, leveraging a novel match refinement module that relies on coarse local descriptors. XFeat is versatile and hardware-independent, surpassing current deep learning-based local features in speed (up to 5x faster) with comparable or better accuracy, proven in pose estimation and visual localization. We showcase it running in real-time on an inexpensive laptop CPU without specialized hardware optimizations.
+"""
+def find_matches(image_0, image_1):
+    image_0_bgr = cv2.cvtColor(image_0, cv2.COLOR_RGB2BGR)
+    image_1_bgr = cv2.cvtColor(image_1, cv2.COLOR_RGB2BGR)
+    xfeat = XFeat(weights="weights/xfeat.pt", top_k=4096)
+    #Use out-of-the-box function for extraction + MNN matching
+    match_kp0, match_kp1 = xfeat.match_xfeat(image_0_bgr, image_1_bgr, top_k = 4096)
+    # canvas = warp_corners_and_draw_matches(mkpts_0, mkpts_1, image_0, image_1)
+    _, mask = cv2.findHomography(match_kp0, match_kp1, cv2.USAC_MAGSAC, 3.5, maxIters=1_000, confidence=0.999)
+    keep = mask.flatten().astype(bool)
+    match_kp0 = match_kp0[keep]
+    match_kp1 = match_kp1[keep]
+    num_filtered_matches = len(match_kp0)
+    viz = visualize_matches(
+        image_0,
+        image_1,
+        match_kp0,
+        match_kp1,
+        np.eye(num_filtered_matches),
+        show_keypoints=True,
+        highlight_unmatched=True,
+        title=f"{num_filtered_matches} matches",
+        line_width=2,
+    )
+    return viz
+with gr.Blocks() as demo:
+    gr.Markdown(HEADER)
+    with gr.Accordion("Abstract (click to open)", open=False):
+        gr.Image("assets/xfeat_arq.png")
+        gr.Markdown(ABSTRACT)
+    with gr.Row():
+        image_1 = gr.Image()
+        image_2 = gr.Image()
+    with gr.Row():
+        button = gr.Button(value="Find Matches")
+        clear = gr.ClearButton(value="Clear")
+    output = gr.Image()
+    button.click(find_matches, [image_1, image_2], output)
+    clear.add([image_1, image_2, output])
+    gr.Examples(
+        examples=[
+            ["assets/ref.png", "assets/tgt.png"],
+            ["assets/demo1.jpg", "assets/demo2.jpg"],
+            ["assets/tower-1.webp", "assets/tower-2.jpeg"],
+        ],
+        inputs=[image_1, image_2],
+        outputs=[output],
+        fn=find_matches,
+        cache_examples=None,
+    )
+if __name__ == "__main__":
+    demo.launch()

assets/demo1.jpg ADDED Viewed

Git LFS Details

SHA256: 0c3719183ae9139e45569e16861f42ac8e47b46c86f3536fdc52b22011f31871
Pointer size: 130 Bytes
Size of remote file: 85.3 kB

assets/demo2.jpg ADDED Viewed

Git LFS Details

SHA256: 24dbe3a2ee909002b265e647b96a7141419c954a2a90b235699c186f927705c4
Pointer size: 131 Bytes
Size of remote file: 114 kB

assets/ref.png ADDED Viewed

Git LFS Details

SHA256: 1292e2ba509b338a05820e7bf62dcda0b26688a4a4307996ee7c295e6627bee2
Pointer size: 132 Bytes
Size of remote file: 1.09 MB

assets/tgt.png ADDED Viewed

Git LFS Details

SHA256: ffd0fe993cd7967f2e0e44495c9f60828f8e2d6a61440ede4a951ee67b865613
Pointer size: 131 Bytes
Size of remote file: 946 kB

assets/tower-1.webp ADDED Viewed

Git LFS Details

SHA256: ee7721d6a79481e2b255826a2f294a6d01f1a8f9b58fbe1253c4ae5028cf69c2
Pointer size: 131 Bytes
Size of remote file: 256 kB

assets/tower-2.jpeg ADDED Viewed

assets/xfeat_arq.png ADDED Viewed

Git LFS Details

SHA256: 071bf66baf111568ce8a2b27879f1f0b4b0e27552845db35ecd44c9d202cf5ab
Pointer size: 131 Bytes
Size of remote file: 191 kB

figs/sift.gif ADDED Viewed

Git LFS Details

SHA256: 8059f5d11b8cfc01fa96894a7a2aba1c4c5079d808531a4b39d74538e2f5f312
Pointer size: 132 Bytes
Size of remote file: 2.57 MB

figs/ufmg.png ADDED Viewed

Git LFS Details

SHA256: 1210e1ef2125305677de696645609625313d3575e9c73f78ddf0998da37f598a
Pointer size: 129 Bytes
Size of remote file: 7.61 kB

figs/verlab.png ADDED Viewed

Git LFS Details

SHA256: 12d6e4bfc62dc503311c927360d01b9c506df4df4853fad930fb6b5b9480e86a
Pointer size: 130 Bytes
Size of remote file: 16.2 kB

figs/xfeat.gif ADDED Viewed

Git LFS Details

SHA256: 1dff14e4d08150f0000735b88dd961d0b78983ffc00f2d32ff1a1a9d826c3a3d
Pointer size: 132 Bytes
Size of remote file: 3.2 MB

figs/xfeat_arq.png ADDED Viewed

Git LFS Details

SHA256: 071bf66baf111568ce8a2b27879f1f0b4b0e27552845db35ecd44c9d202cf5ab
Pointer size: 131 Bytes
Size of remote file: 191 kB

figs/xfeat_quali.jpg ADDED Viewed

Git LFS Details

SHA256: 8c5f7981649d80ab757d232bc755b459c396eff2a9f4572e393aecc834128588
Pointer size: 131 Bytes
Size of remote file: 165 kB

hubconf.py ADDED Viewed

	@@ -0,0 +1,15 @@

+dependencies = ['torch']
+from modules.xfeat import XFeat as _XFeat
+import torch
+def XFeat(pretrained=True, top_k=4096):
+    """
+    XFeat model
+    pretrained (bool): kwargs, load pretrained weights into the model
+    """
+    weights = None
+    if pretrained:
+        weights = torch.hub.load_state_dict_from_url("https://github.com/verlab/accelerated_features/raw/main/weights/xfeat.pt")
+    model = _XFeat(weights, top_k=top_k)
+    return model

minimal_example.py ADDED Viewed

	@@ -0,0 +1,49 @@

+"""
+	"XFeat: Accelerated Features for Lightweight Image Matching, CVPR 2024."
+	https://www.verlab.dcc.ufmg.br/descriptors/xfeat_cvpr24/
+    Minimal example of how to use XFeat.
+"""
+import numpy as np
+import os
+import torch
+import tqdm
+from modules.xfeat import XFeat
+os.environ['CUDA_VISIBLE_DEVICES'] = '' #Force CPU, comment for GPU
+xfeat = XFeat()
+#Random input
+x = torch.randn(1,3,480,640)
+#Simple inference with batch = 1
+output = xfeat.detectAndCompute(x, top_k = 4096)[0]
+print("----------------")
+print("keypoints: ", output['keypoints'].shape)
+print("descriptors: ", output['descriptors'].shape)
+print("scores: ", output['scores'].shape)
+print("----------------\n")
+x = torch.randn(1,3,480,640)
+# Stress test
+for i in tqdm.tqdm(range(100), desc="Stress test on VGA resolution"):
+	output = xfeat.detectAndCompute(x, top_k = 4096)
+# Batched mode
+x = torch.randn(4,3,480,640)
+outputs = xfeat.detectAndCompute(x, top_k = 4096)
+print("# detected features on each batch item:", [len(o['keypoints']) for o in outputs])
+# Match two images with sparse features
+x1 = torch.randn(1,3,480,640)
+x2 = torch.randn(1,3,480,640)
+mkpts_0, mkpts_1 = xfeat.match_xfeat(x1, x2)
+# Match two images with semi-dense approach -- batched mode with batch size 4
+x1 = torch.randn(4,3,480,640)
+x2 = torch.randn(4,3,480,640)
+matches_list = xfeat.match_xfeat_star(x1, x2)
+print(matches_list[0].shape)

modules/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+"""
+	"XFeat: Accelerated Features for Lightweight Image Matching, CVPR 2024."
+	https://www.verlab.dcc.ufmg.br/descriptors/xfeat_cvpr24/
+"""

modules/interpolator.py ADDED Viewed

	@@ -0,0 +1,33 @@

+"""
+	"XFeat: Accelerated Features for Lightweight Image Matching, CVPR 2024."
+	https://www.verlab.dcc.ufmg.br/descriptors/xfeat_cvpr24/
+"""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class InterpolateSparse2d(nn.Module):
+    """ Efficiently interpolate tensor at given sparse 2D positions. """
+    def __init__(self, mode = 'bicubic', align_corners = False):
+        super().__init__()
+        self.mode = mode
+        self.align_corners = align_corners
+    def normgrid(self, x, H, W):
+        """ Normalize coords to [-1,1]. """
+        return 2. * (x/(torch.tensor([W-1, H-1], device = x.device, dtype = x.dtype))) - 1.
+    def forward(self, x, pos, H, W):
+        """
+        Input
+            x: [B, C, H, W] feature tensor
+            pos: [B, N, 2] tensor of positions
+            H, W: int, original resolution of input 2d positions -- used in normalization [-1,1]
+        Returns
+            [B, N, C] sampled channels at 2d positions
+        """
+        grid = self.normgrid(pos, H, W).unsqueeze(-2).to(x.dtype)
+        x = F.grid_sample(x, grid, mode = self.mode , align_corners = False)
+        return x.permute(0,2,3,1).squeeze(-2)

modules/model.py ADDED Viewed

	@@ -0,0 +1,154 @@

+"""
+	"XFeat: Accelerated Features for Lightweight Image Matching, CVPR 2024."
+	https://www.verlab.dcc.ufmg.br/descriptors/xfeat_cvpr24/
+"""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import time
+class BasicLayer(nn.Module):
+	"""
+	  Basic Convolutional Layer: Conv2d -> BatchNorm -> ReLU
+	"""
+	def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=1, dilation=1, bias=False):
+		super().__init__()
+		self.layer = nn.Sequential(
+									  nn.Conv2d( in_channels, out_channels, kernel_size, padding = padding, stride=stride, dilation=dilation, bias = bias),
+									  nn.BatchNorm2d(out_channels, affine=False),
+									  nn.ReLU(inplace = True),
+									)
+	def forward(self, x):
+	  return self.layer(x)
+class XFeatModel(nn.Module):
+	"""
+	   Implementation of architecture described in
+	   "XFeat: Accelerated Features for Lightweight Image Matching, CVPR 2024."
+	"""
+	def __init__(self):
+		super().__init__()
+		self.norm = nn.InstanceNorm2d(1)
+		########### ⬇️ CNN Backbone & Heads ⬇️ ###########
+		self.skip1 = nn.Sequential(	 nn.AvgPool2d(4, stride = 4),
+			  						 nn.Conv2d (1, 24, 1, stride = 1, padding=0) )
+		self.block1 = nn.Sequential(
+										BasicLayer( 1,  4, stride=1),
+										BasicLayer( 4,  8, stride=2),
+										BasicLayer( 8,  8, stride=1),
+										BasicLayer( 8, 24, stride=2),
+									)
+		self.block2 = nn.Sequential(
+										BasicLayer(24, 24, stride=1),
+										BasicLayer(24, 24, stride=1),
+									 )
+		self.block3 = nn.Sequential(
+										BasicLayer(24, 64, stride=2),
+										BasicLayer(64, 64, stride=1),
+										BasicLayer(64, 64, 1, padding=0),
+									 )
+		self.block4 = nn.Sequential(
+										BasicLayer(64, 64, stride=2),
+										BasicLayer(64, 64, stride=1),
+										BasicLayer(64, 64, stride=1),
+									 )
+		self.block5 = nn.Sequential(
+										BasicLayer( 64, 128, stride=2),
+										BasicLayer(128, 128, stride=1),
+										BasicLayer(128, 128, stride=1),
+										BasicLayer(128,  64, 1, padding=0),
+									 )
+		self.block_fusion =  nn.Sequential(
+										BasicLayer(64, 64, stride=1),
+										BasicLayer(64, 64, stride=1),
+										nn.Conv2d (64, 64, 1, padding=0)
+									 )
+		self.heatmap_head = nn.Sequential(
+										BasicLayer(64, 64, 1, padding=0),
+										BasicLayer(64, 64, 1, padding=0),
+										nn.Conv2d (64, 1, 1),
+										nn.Sigmoid()
+									)
+		self.keypoint_head = nn.Sequential(
+										BasicLayer(64, 64, 1, padding=0),
+										BasicLayer(64, 64, 1, padding=0),
+										BasicLayer(64, 64, 1, padding=0),
+										nn.Conv2d (64, 65, 1),
+									)
+  		########### ⬇️ Fine Matcher MLP ⬇️ ###########
+		self.fine_matcher =  nn.Sequential(
+											nn.Linear(128, 512),
+											nn.BatchNorm1d(512, affine=False),
+									  		nn.ReLU(inplace = True),
+											nn.Linear(512, 512),
+											nn.BatchNorm1d(512, affine=False),
+									  		nn.ReLU(inplace = True),
+											nn.Linear(512, 512),
+											nn.BatchNorm1d(512, affine=False),
+									  		nn.ReLU(inplace = True),
+											nn.Linear(512, 512),
+											nn.BatchNorm1d(512, affine=False),
+									  		nn.ReLU(inplace = True),
+											nn.Linear(512, 64),
+										)
+	def _unfold2d(self, x, ws = 2):
+		"""
+			Unfolds tensor in 2D with desired ws (window size) and concat the channels
+		"""
+		B, C, H, W = x.shape
+		x = x.unfold(2,  ws , ws).unfold(3, ws,ws)                             \
+			.reshape(B, C, H//ws, W//ws, ws**2)
+		return x.permute(0, 1, 4, 2, 3).reshape(B, -1, H//ws, W//ws)
+	def forward(self, x):
+		"""
+			input:
+				x -> torch.Tensor(B, C, H, W) grayscale or rgb images
+			return:
+				feats     ->  torch.Tensor(B, 64, H/8, W/8) dense local features
+				keypoints ->  torch.Tensor(B, 65, H/8, W/8) keypoint logit map
+				heatmap   ->  torch.Tensor(B,  1, H/8, W/8) reliability map
+		"""
+		#dont backprop through normalization
+		with torch.no_grad():
+			x = x.mean(dim=1, keepdim = True)
+			x = self.norm(x)
+		#main backbone
+		x1 = self.block1(x)
+		x2 = self.block2(x1 + self.skip1(x))
+		x3 = self.block3(x2)
+		x4 = self.block4(x3)
+		x5 = self.block5(x4)
+		#pyramid fusion
+		x4 = F.interpolate(x4, (x3.shape[-2], x3.shape[-1]), mode='bilinear')
+		x5 = F.interpolate(x5, (x3.shape[-2], x3.shape[-1]), mode='bilinear')
+		feats = self.block_fusion( x3 + x4 + x5 )
+		#heads
+		heatmap = self.heatmap_head(feats) # Reliability map
+		keypoints = self.keypoint_head(self._unfold2d(x, ws=8)) #Keypoint map logits
+		return feats, keypoints, heatmap

modules/xfeat.py ADDED Viewed

	@@ -0,0 +1,346 @@

+"""
+	"XFeat: Accelerated Features for Lightweight Image Matching, CVPR 2024."
+	https://www.verlab.dcc.ufmg.br/descriptors/xfeat_cvpr24/
+"""
+import numpy as np
+import os
+import torch
+import torch.nn.functional as F
+import tqdm
+from modules.model import *
+from modules.interpolator import InterpolateSparse2d
+class XFeat(nn.Module):
+	"""
+		Implements the inference module for XFeat.
+		It supports inference for both sparse and semi-dense feature extraction & matching.
+	"""
+	def __init__(self, weights = os.path.abspath(os.path.dirname(__file__)) + '/../weights/xfeat.pt', top_k = 4096):
+		super().__init__()
+		self.dev = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+		self.net = XFeatModel().to(self.dev).eval()
+		self.top_k = top_k
+		if weights is not None:
+			if isinstance(weights, str):
+				print('loading weights from: ' + weights)
+				self.net.load_state_dict(torch.load(weights, map_location=self.dev))
+			else:
+				self.net.load_state_dict(weights)
+		self.interpolator = InterpolateSparse2d('bicubic')
+	@torch.inference_mode()
+	def detectAndCompute(self, x, top_k = None):
+		"""
+			Compute sparse keypoints & descriptors. Supports batched mode.
+			input:
+				x -> torch.Tensor(B, C, H, W): grayscale or rgb image
+				top_k -> int: keep best k features
+			return:
+				List[Dict]:
+					'keypoints'    ->   torch.Tensor(N, 2): keypoints (x,y)
+					'scores'       ->   torch.Tensor(N,): keypoint scores
+					'descriptors'  ->   torch.Tensor(N, 64): local features
+		"""
+		if top_k is None: top_k = self.top_k
+		x, rh1, rw1 = self.preprocess_tensor(x)
+		B, _, _H1, _W1 = x.shape
+		M1, K1, H1 = self.net(x)
+		M1 = F.normalize(M1, dim=1)
+		#Convert logits to heatmap and extract kpts
+		K1h = self.get_kpts_heatmap(K1)
+		mkpts = self.NMS(K1h, threshold=0.05, kernel_size=5)
+		#Compute reliability scores
+		_nearest = InterpolateSparse2d('nearest')
+		_bilinear = InterpolateSparse2d('bilinear')
+		scores = (_nearest(K1h, mkpts, _H1, _W1) * _bilinear(H1, mkpts, _H1, _W1)).squeeze(-1)
+		scores[torch.all(mkpts == 0, dim=-1)] = -1
+		#Select top-k features
+		idxs = torch.argsort(-scores)
+		mkpts_x  = torch.gather(mkpts[...,0], -1, idxs)[:, :top_k]
+		mkpts_y  = torch.gather(mkpts[...,1], -1, idxs)[:, :top_k]
+		mkpts = torch.cat([mkpts_x[...,None], mkpts_y[...,None]], dim=-1)
+		scores = torch.gather(scores, -1, idxs)[:, :top_k]
+		#Interpolate descriptors at kpts positions
+		feats = self.interpolator(M1, mkpts, H = _H1, W = _W1)
+		#L2-Normalize
+		feats = F.normalize(feats, dim=-1)
+		#Correct kpt scale
+		mkpts = mkpts * torch.tensor([rw1,rh1], device=mkpts.device).view(1, 1, -1)
+		valid = scores > 0
+		return [
+				   {'keypoints': mkpts[b][valid[b]],
+					'scores': scores[b][valid[b]],
+					'descriptors': feats[b][valid[b]]} for b in range(B)
+			   ]
+	@torch.inference_mode()
+	def detectAndComputeDense(self, x, top_k = None, multiscale = True):
+		"""
+			Compute dense *and coarse* descriptors. Supports batched mode.
+			input:
+				x -> torch.Tensor(B, C, H, W): grayscale or rgb image
+				top_k -> int: keep best k features
+			return: features sorted by their reliability score -- from most to least
+				List[Dict]:
+					'keypoints'    ->   torch.Tensor(top_k, 2): coarse keypoints
+					'scales'       ->   torch.Tensor(top_k,): extraction scale
+					'descriptors'  ->   torch.Tensor(top_k, 64): coarse local features
+		"""
+		if top_k is None: top_k = self.top_k
+		if multiscale:
+			mkpts, sc, feats = self.extract_dualscale(x, top_k)
+		else:
+			mkpts, feats = self.extractDense(x, top_k)
+			sc = torch.ones(mkpts.shape[:2], device=mkpts.device)
+		return {'keypoints': mkpts,
+				'descriptors': feats,
+				'scales': sc }
+	@torch.inference_mode()
+	def match_xfeat(self, img1, img2, top_k = None, min_cossim = -1):
+		"""
+			Simple extractor and MNN matcher.
+			For simplicity it does not support batched mode due to possibly different number of kpts.
+			input:
+				img1 -> torch.Tensor (1,C,H,W) or np.ndarray (H,W,C): grayscale or rgb image.
+				img2 -> torch.Tensor (1,C,H,W) or np.ndarray (H,W,C): grayscale or rgb image.
+				top_k -> int: keep best k features
+			returns:
+				mkpts_0, mkpts_1 -> np.ndarray (N,2) xy coordinate matches from image1 to image2
+		"""
+		if top_k is None: top_k = self.top_k
+		img1 = self.parse_input(img1)
+		img2 = self.parse_input(img2)
+		out1 = self.detectAndCompute(img1, top_k=top_k)[0]
+		out2 = self.detectAndCompute(img2, top_k=top_k)[0]
+		idxs0, idxs1 = self.match(out1['descriptors'], out2['descriptors'], min_cossim=min_cossim )
+		return out1['keypoints'][idxs0].cpu().numpy(), out2['keypoints'][idxs1].cpu().numpy()
+	@torch.inference_mode()
+	def match_xfeat_star(self, im_set1, im_set2, top_k = None):
+		"""
+			Extracts coarse feats, then match pairs and finally refine matches, currently supports batched mode.
+			input:
+				im_set1 -> torch.Tensor(B, C, H, W) or np.ndarray (H,W,C): grayscale or rgb images.
+				im_set2 -> torch.Tensor(B, C, H, W) or np.ndarray (H,W,C): grayscale or rgb images.
+				top_k -> int: keep best k features
+			returns:
+				matches -> List[torch.Tensor(N, 4)]: List of size B containing tensor of pairwise matches (x1,y1,x2,y2)
+		"""
+		if top_k is None: top_k = self.top_k
+		im_set1 = self.parse_input(im_set1)
+		im_set2 = self.parse_input(im_set2)
+		#Compute coarse feats
+		out1 = self.detectAndComputeDense(im_set1, top_k=top_k)
+		out2 = self.detectAndComputeDense(im_set2, top_k=top_k)
+		#Match batches of pairs
+		idxs_list = self.batch_match(out1['descriptors'], out2['descriptors'] )
+		B = len(im_set1)
+		#Refine coarse matches
+		#this part is harder to batch, currently iterate
+		matches = []
+		for b in range(B):
+			matches.append(self.refine_matches(out1, out2, matches = idxs_list, batch_idx=b))
+		return matches if B > 1 else (matches[0][:, :2].cpu().numpy(), matches[0][:, 2:].cpu().numpy())
+	def preprocess_tensor(self, x):
+		""" Guarantee that image is divisible by 32 to avoid aliasing artifacts. """
+		if isinstance(x, np.ndarray) and x.shape == 3:
+			x = torch.tensor(x).permute(2,0,1)[None]
+		x = x.to(self.dev).float()
+		H, W = x.shape[-2:]
+		_H, _W = (H//32) * 32, (W//32) * 32
+		rh, rw = H/_H, W/_W
+		x = F.interpolate(x, (_H, _W), mode='bilinear', align_corners=False)
+		return x, rh, rw
+	def get_kpts_heatmap(self, kpts, softmax_temp = 1.0):
+		scores = F.softmax(kpts*softmax_temp, 1)[:, :64]
+		B, _, H, W = scores.shape
+		heatmap = scores.permute(0, 2, 3, 1).reshape(B, H, W, 8, 8)
+		heatmap = heatmap.permute(0, 1, 3, 2, 4).reshape(B, 1, H*8, W*8)
+		return heatmap
+	def NMS(self, x, threshold = 0.05, kernel_size = 5):
+		B, _, H, W = x.shape
+		pad=kernel_size//2
+		local_max = nn.MaxPool2d(kernel_size=kernel_size, stride=1, padding=pad)(x)
+		pos = (x == local_max) & (x > threshold)
+		pos_batched = [k.nonzero()[..., 1:].flip(-1) for k in pos]
+		pad_val = max([len(x) for x in pos_batched])
+		pos = torch.zeros((B, pad_val, 2), dtype=torch.long, device=x.device)
+		#Pad kpts and build (B, N, 2) tensor
+		for b in range(len(pos_batched)):
+			pos[b, :len(pos_batched[b]), :] = pos_batched[b]
+		return pos
+	@torch.inference_mode()
+	def batch_match(self, feats1, feats2, min_cossim = -1):
+		B = len(feats1)
+		cossim = torch.bmm(feats1, feats2.permute(0,2,1))
+		match12 = torch.argmax(cossim, dim=-1)
+		match21 = torch.argmax(cossim.permute(0,2,1), dim=-1)
+		idx0 = torch.arange(len(match12[0]), device=match12.device)
+		batched_matches = []
+		for b in range(B):
+			mutual = match21[b][match12[b]] == idx0
+			if min_cossim > 0:
+				cossim_max, _ = cossim[b].max(dim=1)
+				good = cossim_max > min_cossim
+				idx0_b = idx0[mutual & good]
+				idx1_b = match12[b][mutual & good]
+			else:
+				idx0_b = idx0[mutual]
+				idx1_b = match12[b][mutual]
+			batched_matches.append((idx0_b, idx1_b))
+		return batched_matches
+	def subpix_softmax2d(self, heatmaps, temp = 3):
+		N, H, W = heatmaps.shape
+		heatmaps = torch.softmax(temp * heatmaps.view(-1, H*W), -1).view(-1, H, W)
+		x, y = torch.meshgrid(torch.arange(W, device =  heatmaps.device ), torch.arange(H, device =  heatmaps.device ), indexing = 'xy')
+		x = x - (W//2)
+		y = y - (H//2)
+		coords_x = (x[None, ...] * heatmaps)
+		coords_y = (y[None, ...] * heatmaps)
+		coords = torch.cat([coords_x[..., None], coords_y[..., None]], -1).view(N, H*W, 2)
+		coords = coords.sum(1)
+		return coords
+	def refine_matches(self, d0, d1, matches, batch_idx, fine_conf = 0.25):
+		idx0, idx1 = matches[batch_idx]
+		feats1 = d0['descriptors'][batch_idx][idx0]
+		feats2 = d1['descriptors'][batch_idx][idx1]
+		mkpts_0 = d0['keypoints'][batch_idx][idx0]
+		mkpts_1 = d1['keypoints'][batch_idx][idx1]
+		sc0 = d0['scales'][batch_idx][idx0]
+		#Compute fine offsets
+		offsets = self.net.fine_matcher(torch.cat([feats1, feats2],dim=-1))
+		conf = F.softmax(offsets*3, dim=-1).max(dim=-1)[0]
+		offsets = self.subpix_softmax2d(offsets.view(-1,8,8))
+		mkpts_0 += offsets* (sc0[:,None]) #*0.9 #* (sc0[:,None])
+		mask_good = conf > fine_conf
+		mkpts_0 = mkpts_0[mask_good]
+		mkpts_1 = mkpts_1[mask_good]
+		return torch.cat([mkpts_0, mkpts_1], dim=-1)
+	@torch.inference_mode()
+	def match(self, feats1, feats2, min_cossim = 0.82):
+		cossim = feats1 @ feats2.t()
+		cossim_t = feats2 @ feats1.t()
+		_, match12 = cossim.max(dim=1)
+		_, match21 = cossim_t.max(dim=1)
+		idx0 = torch.arange(len(match12), device=match12.device)
+		mutual = match21[match12] == idx0
+		if min_cossim > 0:
+			cossim, _ = cossim.max(dim=1)
+			good = cossim > min_cossim
+			idx0 = idx0[mutual & good]
+			idx1 = match12[mutual & good]
+		else:
+			idx0 = idx0[mutual]
+			idx1 = match12[mutual]
+		return idx0, idx1
+	def create_xy(self, h, w, dev):
+		y, x = torch.meshgrid(torch.arange(h, device = dev),
+								torch.arange(w, device = dev), indexing='ij')
+		xy = torch.cat([x[..., None],y[..., None]], -1).reshape(-1,2)
+		return xy
+	def extractDense(self, x, top_k = 8_000):
+		if top_k < 1:
+			top_k = 100_000_000
+		x, rh1, rw1 = self.preprocess_tensor(x)
+		M1, K1, H1 = self.net(x)
+		B, C, _H1, _W1 = M1.shape
+		xy1 = (self.create_xy(_H1, _W1, M1.device) * 8).expand(B,-1,-1)
+		M1 = M1.permute(0,2,3,1).reshape(B, -1, C)
+		H1 = H1.permute(0,2,3,1).reshape(B, -1)
+		_, top_k = torch.topk(H1, k = min(len(H1[0]), top_k), dim=-1)
+		feats = torch.gather( M1, 1, top_k[...,None].expand(-1, -1, 64))
+		mkpts = torch.gather(xy1, 1, top_k[...,None].expand(-1, -1, 2))
+		mkpts = mkpts * torch.tensor([rw1, rh1], device=mkpts.device).view(1,-1)
+		return mkpts, feats
+	def extract_dualscale(self, x, top_k, s1 = 0.6, s2 = 1.3):
+		x1 = F.interpolate(x, scale_factor=s1, align_corners=False, mode='bilinear')
+		x2 = F.interpolate(x, scale_factor=s2, align_corners=False, mode='bilinear')
+		B, _, _, _ = x.shape
+		mkpts_1, feats_1 = self.extractDense(x1, int(top_k*0.20))
+		mkpts_2, feats_2 = self.extractDense(x2, int(top_k*0.80))
+		mkpts = torch.cat([mkpts_1/s1, mkpts_2/s2], dim=1)
+		sc1 = torch.ones(mkpts_1.shape[:2], device=mkpts_1.device) * (1/s1)
+		sc2 = torch.ones(mkpts_2.shape[:2], device=mkpts_2.device) * (1/s2)
+		sc = torch.cat([sc1, sc2],dim=1)
+		feats = torch.cat([feats_1, feats_2], dim=1)
+		return mkpts, sc, feats
+	def parse_input(self, x):
+		if len(x.shape) == 3:
+			x = x[None, ...]
+		if isinstance(x, np.ndarray):
+			x = torch.tensor(x).permute(0,3,1,2)/255
+		return x

notebooks/minimal_example.ipynb ADDED Viewed

	@@ -0,0 +1,256 @@

+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "source": [
+        "#XFeat minimal inference example"
+      ],
+      "metadata": {
+        "id": "2tDj94al5GAJ"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## Clone repository"
+      ],
+      "metadata": {
+        "id": "X8MPXBro5IFv"
+      }
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 1,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "tVkH1ChzNcLW",
+        "outputId": "da9a9474-76bd-4b66-8ecd-8ba0022f030e"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Cloning into 'accelerated_features'...\n",
+            "remote: Enumerating objects: 27, done.\u001b[K\n",
+            "remote: Counting objects: 100% (11/11), done.\u001b[K\n",
+            "remote: Compressing objects: 100% (10/10), done.\u001b[K\n",
+            "remote: Total 27 (delta 0), reused 5 (delta 0), pack-reused 16\u001b[K\n",
+            "Receiving objects: 100% (27/27), 13.29 MiB | 23.03 MiB/s, done.\n",
+            "Resolving deltas: 100% (1/1), done.\n",
+            "/content/accelerated_features\n"
+          ]
+        }
+      ],
+      "source": [
+        "!cd /content && git clone 'https://github.com/verlab/accelerated_features.git'\n",
+        "%cd /content/accelerated_features"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## Test on simple input (sparse setting)"
+      ],
+      "metadata": {
+        "id": "32T-WzfU5NRH"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import numpy as np\n",
+        "import os\n",
+        "import torch\n",
+        "import tqdm\n",
+        "\n",
+        "from modules.xfeat import XFeat\n",
+        "\n",
+        "xfeat = XFeat()\n",
+        "\n",
+        "#Random input\n",
+        "x = torch.randn(1,3,480,640)\n",
+        "\n",
+        "#Simple inference with batch = 1\n",
+        "output = xfeat.detectAndCompute(x, top_k = 4096)[0]\n",
+        "print(\"----------------\")\n",
+        "print(\"keypoints: \", output['keypoints'].shape)\n",
+        "print(\"descriptors: \", output['descriptors'].shape)\n",
+        "print(\"scores: \", output['scores'].shape)\n",
+        "print(\"----------------\\n\")"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "o1TMnCEfNfvD",
+        "outputId": "f59757f5-477a-4642-e955-7a5abefe3c21"
+      },
+      "execution_count": 2,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "loading weights from: /content/accelerated_features/modules/../weights/xfeat.pt\n",
+            "----------------\n",
+            "keypoints:  torch.Size([4096, 2])\n",
+            "descriptors:  torch.Size([4096, 64])\n",
+            "scores:  torch.Size([4096])\n",
+            "----------------\n",
+            "\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## Stress test to check FPS on VGA (sparse setting)"
+      ],
+      "metadata": {
+        "id": "8b9C09ya5UwL"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "x = torch.randn(1,3,480,640)\n",
+        "# Stress test\n",
+        "for i in tqdm.tqdm(range(100), desc=\"Stress test on VGA resolution\"):\n",
+        "\toutput = xfeat.detectAndCompute(x, top_k = 4096)\n"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "Zsjz-QT95ZrM",
+        "outputId": "2df6f545-419f-4cc3-ad8b-bf5e12741dba"
+      },
+      "execution_count": 7,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Stress test on VGA resolution: 100%|██████████| 100/100 [00:14<00:00,  6.74it/s]\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## Test with batched mode (sparse)"
+      ],
+      "metadata": {
+        "id": "1jAl-ejS5du_"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Batched mode\n",
+        "x = torch.randn(4,3,480,640)\n",
+        "outputs = xfeat.detectAndCompute(x, top_k = 4096)\n",
+        "print(\"# detected features on each batch item:\", [len(o['keypoints']) for o in outputs])"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "lAarS8UH5gyg",
+        "outputId": "883f13f8-3fac-48f2-c0a3-656a81b57f2c"
+      },
+      "execution_count": 4,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "# detected features on each batch item: [4096, 4096, 4096, 4096]\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## Matches two images with built-in MNN matcher (sparse mode)"
+      ],
+      "metadata": {
+        "id": "H60iMAlh5nqP"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Match two images with sparse features\n",
+        "x1 = torch.randn(1,3,480,640)\n",
+        "x2 = torch.randn(1,3,480,640)\n",
+        "mkpts_0, mkpts_1 = xfeat.match_xfeat(x1, x2)"
+      ],
+      "metadata": {
+        "id": "6N-ZqoMZ5syf"
+      },
+      "execution_count": 5,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## Matches two images with semi-dense matching, and batched mode (batch size = 4) for demonstration purpose"
+      ],
+      "metadata": {
+        "id": "MOV4vZDp5v9_"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Create 4 image pairs\n",
+        "x1 = torch.randn(4,3,480,640)\n",
+        "x2 = torch.randn(4,3,480,640)\n",
+        "\n",
+        "#Obtain matches for each batch item\n",
+        "matches_list = xfeat.match_xfeat_star(x1, x2, top_k = 5000)\n",
+        "print('number of img pairs', len(matches_list))\n",
+        "print(matches_list[0].shape) # -> output is (x1,y1,x2,y2)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "Axe0o6U85zGV",
+        "outputId": "e1257959-24fc-4194-b2f1-ee06cf450b24"
+      },
+      "execution_count": 6,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "number of img pairs 4\n",
+            "torch.Size([182, 4])\n"
+          ]
+        }
+      ]
+    }
+  ]
+}

notebooks/xfeat_matching.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

notebooks/xfeat_torch_hub.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

packages.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ python3-opencv

realtime_demo.py ADDED Viewed

	@@ -0,0 +1,295 @@

+"""
+	"XFeat: Accelerated Features for Lightweight Image Matching, CVPR 2024."
+	https://www.verlab.dcc.ufmg.br/descriptors/xfeat_cvpr24/
+    Real-time homography estimation demo. Note that scene has to be planar or just rotate the camera for the estimation to work properly.
+"""
+import cv2
+import numpy as np
+import torch
+from time import time, sleep
+import argparse, sys, tqdm
+import threading
+from modules.xfeat import XFeat
+def argparser():
+    parser = argparse.ArgumentParser(description="Configurations for the real-time matching demo.")
+    parser.add_argument('--width', type=int, default=640, help='Width of the video capture stream.')
+    parser.add_argument('--height', type=int, default=480, help='Height of the video capture stream.')
+    parser.add_argument('--max_kpts', type=int, default=3_000, help='Maximum number of keypoints.')
+    parser.add_argument('--method', type=str, choices=['ORB', 'SIFT', 'XFeat'], default='XFeat', help='Local feature detection method to use.')
+    parser.add_argument('--cam', type=int, default=0, help='Webcam device number.')
+    return parser.parse_args()
+class FrameGrabber(threading.Thread):
+    def __init__(self, cap):
+        super().__init__()
+        self.cap = cap
+        _, self.frame = self.cap.read()
+        self.running = False
+    def run(self):
+        self.running = True
+        while self.running:
+            ret, frame = self.cap.read()
+            if not ret:
+                print("Can't receive frame (stream ended?).")
+            self.frame = frame
+            sleep(0.01)
+    def stop(self):
+        self.running = False
+        self.cap.release()
+    def get_last_frame(self):
+        return self.frame
+class CVWrapper():
+    def __init__(self, mtd):
+        self.mtd = mtd
+    def detectAndCompute(self, x, mask=None):
+        return self.mtd.detectAndCompute(torch.tensor(x).permute(2,0,1).float()[None])[0]
+class Method:
+    def __init__(self, descriptor, matcher):
+        self.descriptor = descriptor
+        self.matcher = matcher
+def init_method(method, max_kpts):
+    if method == "ORB":
+        return Method(descriptor=cv2.ORB_create(max_kpts, fastThreshold=10), matcher=cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True))
+    elif method == "SIFT":
+        return Method(descriptor=cv2.SIFT_create(max_kpts, contrastThreshold=-1, edgeThreshold=1000), matcher=cv2.BFMatcher(cv2.NORM_L2, crossCheck=True))
+    elif method == "XFeat":
+        return Method(descriptor=CVWrapper(XFeat(top_k = max_kpts)), matcher=XFeat())
+    else:
+        raise RuntimeError("Invalid Method.")
+class MatchingDemo:
+    def __init__(self, args):
+        self.args = args
+        self.cap = cv2.VideoCapture(args.cam)
+        self.width = args.width
+        self.height = args.height
+        self.ref_frame = None
+        self.ref_precomp = [[],[]]
+        self.corners = [[50, 50], [640-50, 50], [640-50, 480-50], [50, 480-50]]
+        self.current_frame = None
+        self.H = None
+        self.setup_camera()
+        #Init frame grabber thread
+        self.frame_grabber = FrameGrabber(self.cap)
+        self.frame_grabber.start()
+        #Homography params
+        self.min_inliers = 50
+        self.ransac_thr = 4.0
+        #FPS check
+        self.FPS = 0
+        self.time_list = []
+        self.max_cnt = 30 #avg FPS over this number of frames
+        #Set local feature method here -- we expect cv2 or Kornia convention
+        self.method = init_method(args.method, max_kpts=args.max_kpts)
+        # Setting up font for captions
+        self.font = cv2.FONT_HERSHEY_SIMPLEX
+        self.font_scale = 0.9
+        self.line_type = cv2.LINE_AA
+        self.line_color = (0,255,0)
+        self.line_thickness = 3
+        self.window_name = "Real-time matching - Press 's' to set the reference frame."
+        # Removes toolbar and status bar
+        cv2.namedWindow(self.window_name, flags=cv2.WINDOW_GUI_NORMAL)
+        # Set the window size
+        cv2.resizeWindow(self.window_name, self.width*2, self.height*2)
+        #Set Mouse Callback
+        cv2.setMouseCallback(self.window_name, self.mouse_callback)
+    def setup_camera(self):
+        self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, self.width)
+        self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, self.height)
+        self.cap.set(cv2.CAP_PROP_AUTO_EXPOSURE, 3)
+        #self.cap.set(cv2.CAP_PROP_EXPOSURE, 200)
+        self.cap.set(cv2.CAP_PROP_FPS, 30)
+        if not self.cap.isOpened():
+            print("Cannot open camera")
+            exit()
+    def draw_quad(self, frame, point_list):
+        if len(self.corners) > 1:
+            for i in range(len(self.corners) - 1):
+                cv2.line(frame, tuple(point_list[i]), tuple(point_list[i + 1]), self.line_color, self.line_thickness, lineType = self.line_type)
+            if len(self.corners) == 4:  # Close the quadrilateral if 4 corners are defined
+                cv2.line(frame, tuple(point_list[3]), tuple(point_list[0]), self.line_color, self.line_thickness, lineType = self.line_type)
+    def mouse_callback(self, event, x, y, flags, param):
+        if event == cv2.EVENT_LBUTTONDOWN:
+            if len(self.corners) >= 4:
+                self.corners = []  # Reset corners if already 4 points were clicked
+            self.corners.append((x, y))
+    def putText(self, canvas, text, org, fontFace, fontScale, textColor, borderColor, thickness, lineType):
+        # Draw the border
+        cv2.putText(img=canvas, text=text, org=org, fontFace=fontFace, fontScale=fontScale,
+                    color=borderColor, thickness=thickness+2, lineType=lineType)
+        # Draw the text
+        cv2.putText(img=canvas, text=text, org=org, fontFace=fontFace, fontScale=fontScale,
+                    color=textColor, thickness=thickness, lineType=lineType)
+    def warp_points(self, points, H, x_offset = 0):
+        points_np = np.array(points, dtype='float32').reshape(-1,1,2)
+        warped_points_np = cv2.perspectiveTransform(points_np, H).reshape(-1, 2)
+        warped_points_np[:, 0] += x_offset
+        warped_points = warped_points_np.astype(int).tolist()
+        return warped_points
+    def create_top_frame(self):
+        top_frame_canvas = np.zeros((480, 1280, 3), dtype=np.uint8)
+        top_frame = np.hstack((self.ref_frame, self.current_frame))
+        color = (3, 186, 252)
+        cv2.rectangle(top_frame, (2, 2), (self.width*2-2, self.height-2), color, 5)  # Orange color line as a separator
+        top_frame_canvas[0:self.height, 0:self.width*2] = top_frame
+        # Adding captions on the top frame canvas
+        self.putText(canvas=top_frame_canvas, text="Reference Frame:", org=(10, 30), fontFace=self.font,
+            fontScale=self.font_scale, textColor=(0,0,0), borderColor=color, thickness=1, lineType=self.line_type)
+        self.putText(canvas=top_frame_canvas, text="Target Frame:", org=(650, 30), fontFace=self.font,
+                    fontScale=self.font_scale,  textColor=(0,0,0), borderColor=color, thickness=1, lineType=self.line_type)
+        self.draw_quad(top_frame_canvas, self.corners)
+        return top_frame_canvas
+    def process(self):
+        # Create a blank canvas for the top frame
+        top_frame_canvas = self.create_top_frame()
+        # Match features and draw matches on the bottom frame
+        bottom_frame = self.match_and_draw(self.ref_frame, self.current_frame)
+        # Draw warped corners
+        if self.H is not None and len(self.corners) > 1:
+            self.draw_quad(top_frame_canvas, self.warp_points(self.corners, self.H, self.width))
+        # Stack top and bottom frames vertically on the final canvas
+        canvas = np.vstack((top_frame_canvas, bottom_frame))
+        cv2.imshow(self.window_name, canvas)
+    def match_and_draw(self, ref_frame, current_frame):
+        matches, good_matches = [], []
+        kp1, kp2 = [], []
+        points1, points2 = [], []
+        # Detect and compute features
+        if self.args.method in ['SIFT', 'ORB']:
+            kp1, des1 = self.ref_precomp
+            kp2, des2 = self.method.descriptor.detectAndCompute(current_frame, None)
+        else:
+            current = self.method.descriptor.detectAndCompute(current_frame)
+            kpts1, descs1 = self.ref_precomp['keypoints'], self.ref_precomp['descriptors']
+            kpts2, descs2 = current['keypoints'], current['descriptors']
+            idx0, idx1 = self.method.matcher.match(descs1, descs2, 0.82)
+            points1 = kpts1[idx0].cpu().numpy()
+            points2 = kpts2[idx1].cpu().numpy()
+        if len(kp1) > 10 and len(kp2) > 10 and self.args.method in ['SIFT', 'ORB']:
+            # Match descriptors
+            matches = self.method.matcher.match(des1, des2)
+            if len(matches) > 10:
+                points1 = np.zeros((len(matches), 2), dtype=np.float32)
+                points2 = np.zeros((len(matches), 2), dtype=np.float32)
+                for i, match in enumerate(matches):
+                    points1[i, :] = kp1[match.queryIdx].pt
+                    points2[i, :] = kp2[match.trainIdx].pt
+        if len(points1) > 10 and len(points2) > 10:
+            # Find homography
+            self.H, inliers = cv2.findHomography(points1, points2, cv2.USAC_MAGSAC, self.ransac_thr, maxIters=700, confidence=0.995)
+            inliers = inliers.flatten() > 0
+            if inliers.sum() < self.min_inliers:
+                self.H = None
+            if self.args.method in ["SIFT", "ORB"]:
+                good_matches = [m for i,m in enumerate(matches) if inliers[i]]
+            else:
+                kp1 = [cv2.KeyPoint(p[0],p[1], 5) for p in points1[inliers]]
+                kp2 = [cv2.KeyPoint(p[0],p[1], 5) for p in points2[inliers]]
+                good_matches = [cv2.DMatch(i,i,0) for i in range(len(kp1))]
+            # Draw matches
+            matched_frame = cv2.drawMatches(ref_frame, kp1, current_frame, kp2, good_matches, None, matchColor=(0, 200, 0), flags=2)
+        else:
+            matched_frame = np.hstack([ref_frame, current_frame])
+        color = (240, 89, 169)
+        # Add a colored rectangle to separate from the top frame
+        cv2.rectangle(matched_frame, (2, 2), (self.width*2-2, self.height-2), color, 5)
+        # Adding captions on the top frame canvas
+        self.putText(canvas=matched_frame, text="%s Matches: %d"%(self.args.method, len(good_matches)), org=(10, 30), fontFace=self.font,
+            fontScale=self.font_scale, textColor=(0,0,0), borderColor=color, thickness=1, lineType=self.line_type)
+                # Adding captions on the top frame canvas
+        self.putText(canvas=matched_frame, text="FPS (registration): {:.1f}".format(self.FPS), org=(650, 30), fontFace=self.font,
+            fontScale=self.font_scale, textColor=(0,0,0), borderColor=color, thickness=1, lineType=self.line_type)
+        return matched_frame
+    def main_loop(self):
+        self.current_frame = self.frame_grabber.get_last_frame()
+        self.ref_frame = self.current_frame.copy()
+        self.ref_precomp = self.method.descriptor.detectAndCompute(self.ref_frame, None) #Cache ref features
+        while True:
+            if self.current_frame is None:
+                break
+            t0 = time()
+            self.process()
+            key = cv2.waitKey(1)
+            if key == ord('q'):
+                break
+            elif key == ord('s'):
+                self.ref_frame = self.current_frame.copy()  # Update reference frame
+                self.ref_precomp = self.method.descriptor.detectAndCompute(self.ref_frame, None) #Cache ref features
+            self.current_frame = self.frame_grabber.get_last_frame()
+            #Measure avg. FPS
+            self.time_list.append(time()-t0)
+            if len(self.time_list) > self.max_cnt:
+                self.time_list.pop(0)
+            self.FPS = 1.0 / np.array(self.time_list).mean()
+        self.cleanup()
+    def cleanup(self):
+        self.frame_grabber.stop()
+        self.cap.release()
+        cv2.destroyAllWindows()
+if __name__ == "__main__":
+    demo = MatchingDemo(args = argparser())
+    demo.main_loop()

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+numpy
+opencv-python-headless
+tqdm

utils.py ADDED Viewed

	@@ -0,0 +1,166 @@

+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Shared utility functions for OmniGlue."""
+import cv2
+import numpy as np
+from typing import Optional
+def visualize_matches(
+    image0: np.ndarray,
+    image1: np.ndarray,
+    kp0: np.ndarray,
+    kp1: np.ndarray,
+    match_matrix: np.ndarray,
+    match_labels: Optional[np.ndarray] = None,
+    show_keypoints: bool = False,
+    highlight_unmatched: bool = False,
+    title: Optional[str] = None,
+    line_width: int = 1,
+    circle_radius: int = 4,
+    circle_thickness: int = 2,
+    rng: Optional['np.random.Generator'] = None,
+):
+  """Generates visualization of keypoints and matches for two images.
+  Stacks image0 and image1 horizontally. In case the two images have different
+  heights, scales image1 (and its keypoints) to match image0's height. Note
+  that keypoints must be in (x, y) format, NOT (row, col). If match_matrix
+  includes unmatched dustbins, the dustbins will be removed before visualizing
+  matches.
+  Args:
+    image0: (H, W, 3) array containing image0 contents.
+    image1: (H, W, 3) array containing image1 contents.
+    kp0: (N, 2) array where each row represents (x, y) coordinates of keypoints
+      in image0.
+    kp1: (M, 2) array, where each row represents (x, y) coordinates of keypoints
+      in image1.
+    match_matrix: (N, M) binary array, where values are non-zero for keypoint
+      indices making up a match.
+    match_labels: (N, M) binary array, where values are non-zero for keypoint
+      indices making up a ground-truth match. When None, matches from
+      'match_matrix' are colored randomly. Otherwise, matches from
+      'match_matrix' are colored according to accuracy (compared to labels).
+    show_keypoints: if True, all image0 and image1 keypoints (including
+      unmatched ones) are visualized.
+    highlight_unmatched: if True, highlights unmatched keypoints in blue.
+    title: if not None, adds title text to top left of visualization.
+    line_width: width of correspondence line, in pixels.
+    circle_radius: radius of keypoint circles, if visualized.
+    circle_thickness: thickness of keypoint circles, if visualized.
+    rng: np random number generator to generate the line colors.
+  Returns:
+    Numpy array of image0 and image1 side-by-side, with lines between matches
+    according to match_matrix. If show_keypoints is True, keypoints from both
+    images are also visualized.
+  """
+  # initialize RNG
+  if rng is None:
+    rng = np.random.default_rng()
+  # Make copy of input param that may be modified in this function.
+  kp1 = np.copy(kp1)
+  # Detect unmatched dustbins.
+  has_unmatched_dustbins = (match_matrix.shape[0] == kp0.shape[0] + 1) and (
+      match_matrix.shape[1] == kp1.shape[0] + 1
+  )
+  # If necessary, resize image1 so that the pair can be stacked horizontally.
+  height0 = image0.shape[0]
+  height1 = image1.shape[0]
+  if height0 != height1:
+    scale_factor = height0 / height1
+    if scale_factor <= 1.0:
+      interp_method = cv2.INTER_AREA
+    else:
+      interp_method = cv2.INTER_LINEAR
+    new_dim1 = (int(image1.shape[1] * scale_factor), height0)
+    image1 = cv2.resize(image1, new_dim1, interpolation=interp_method)
+    kp1 *= scale_factor
+  # Create side-by-side image and add lines for all matches.
+  viz = cv2.hconcat([image0, image1])
+  w0 = image0.shape[1]
+  matches = np.argwhere(
+      match_matrix[:-1, :-1] if has_unmatched_dustbins else match_matrix
+  )
+  for match in matches:
+    pt0 = (int(kp0[match[0], 0]), int(kp0[match[0], 1]))
+    pt1 = (int(kp1[match[1], 0] + w0), int(kp1[match[1], 1]))
+    if match_labels is None:
+      color = tuple(rng.integers(0, 255, size=3).tolist())
+    else:
+      if match_labels[match[0], match[1]]:
+        color = (0, 255, 0)
+      else:
+        color = (255, 0, 0)
+    cv2.line(viz, pt0, pt1, color, line_width)
+  # Optionally, add circles to output image to represent each keypoint.
+  if show_keypoints:
+    for i in range(np.shape(kp0)[0]):
+      kp = kp0[i, :]
+      if highlight_unmatched and has_unmatched_dustbins and match_matrix[i, -1]:
+        cv2.circle(
+            viz,
+            tuple(kp.astype(np.int32).tolist()),
+            circle_radius,
+            (255, 0, 0),
+            circle_thickness,
+        )
+      else:
+        cv2.circle(
+            viz,
+            tuple(kp.astype(np.int32).tolist()),
+            circle_radius,
+            (0, 0, 255),
+            circle_thickness,
+        )
+    for j in range(np.shape(kp1)[0]):
+      kp = kp1[j, :]
+      kp[0] += w0
+      if highlight_unmatched and has_unmatched_dustbins and match_matrix[-1, j]:
+        cv2.circle(
+            viz,
+            tuple(kp.astype(np.int32).tolist()),
+            circle_radius,
+            (255, 0, 0),
+            circle_thickness,
+        )
+      else:
+        cv2.circle(
+            viz,
+            tuple(kp.astype(np.int32).tolist()),
+            circle_radius,
+            (0, 0, 255),
+            circle_thickness,
+        )
+  if title is not None:
+    viz = cv2.putText(
+        viz,
+        title,
+        (5, 30),
+        cv2.FONT_HERSHEY_SIMPLEX,
+        1,
+        (0, 0, 255),
+        2,
+        cv2.LINE_AA,
+    )
+  return viz

weights/xfeat.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0f5187fd7bedd26c7fe6acc9685444493a165a35ecc087b33c2db3627f3ea10b
+size 6247949