diff --git a/.github/PULL_REQUEST_TEMPLATE/pull_request_template.md b/.github/PULL_REQUEST_TEMPLATE/pull_request_template.md index e502dc22c88b7112fa1f9f2ce3fed0d0193f3a99..597e1b1c0a6a5affc1487a7e7c6b1d3f3ea03ef0 100644 --- a/.github/PULL_REQUEST_TEMPLATE/pull_request_template.md +++ b/.github/PULL_REQUEST_TEMPLATE/pull_request_template.md @@ -11,7 +11,7 @@ assignees: '' [Please include a summary of the changes and the related issue. (Just overwrite this session directly)] -## Type of change +## Type of Change Please delete options that are not relevant. @@ -26,11 +26,17 @@ Please delete options that are not relevant. - [ ] Code and files are well organized. - [ ] All tests pass. - [ ] New code is covered by tests. +- [ ] The pull request is directed to the corresponding topic branch. - [ ] [Optional] We would be very happy if gitmoji :technologist: could be used to assist the commit message :speech_balloon:! ## Licensing: -By submitting this pull request, I confirm that my contribution is made under the MIT License. +By submitting this pull request, I confirm that: + +- [ ] My contribution is made under the MIT License. +- [ ] I have not included any code from questionable or non-compliant sources (GPL, AGPL, ... etc). +- [ ] I understand that all contributions to this repository must comply with the MIT License, and I promise that my contributions do not violate this license. +- [ ] I have not used any code or content from sources that conflict with the MIT License or are otherwise legally questionable. ## Additional Information diff --git a/.github/workflows/deploy.yaml b/.github/workflows/deploy.yaml index ac158d56c3a7153c755996c91f2ddf63f248a81b..3e265d827e4246e4db18a77a738b221621a3ade5 100644 --- a/.github/workflows/deploy.yaml +++ b/.github/workflows/deploy.yaml @@ -13,7 +13,7 @@ jobs: strategy: matrix: operating-system: [ubuntu-latest, macos-latest] - python-version: [3.8, '3.10', '3.12'] + python-version: [3.8, '3.10'] fail-fast: false steps: diff --git a/.github/workflows/develop.yaml b/.github/workflows/develop.yaml index a840a5f3737ec75abc40ac9c58930381fcc8c2ef..1a1c417b91a8861f410b11922d574ce8b8ad5301 100644 --- a/.github/workflows/develop.yaml +++ b/.github/workflows/develop.yaml @@ -23,7 +23,7 @@ jobs: uses: actions/cache@v2 with: path: ~/.cache/pip - key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} + key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}-3.10 restore-keys: | ${{ runner.os }}-pip- diff --git a/.readthedocs.yaml b/.readthedocs.yaml index d2a47f9b41e6b39fee5da1af192f2d61ed695fc4..8e3f9ba5f5b158e75737acbe95ebf94468a00d55 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -1,32 +1,13 @@ -# .readthedocs.yaml -# Read the Docs configuration file -# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details - -# Required version: 2 -# Set the OS, Python version and other tools you might need build: os: ubuntu-22.04 tools: python: "3.12" - # You can also specify other tool versions: - # nodejs: "19" - # rust: "1.64" - # golang: "1.19" - -# Build documentation in the "docs/" directory with Sphinx -# sphinx: -# configuration: docs/conf.py -# Optionally build your docs in additional formats such as PDF and ePub -# formats: -# - pdf -# - epub +sphinx: + configuration: docs/conf.py -# Optional but recommended, declare the Python requirements required -# to build your documentation -# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html -# python: -# install: -# - requirements: docs/requirements.txt +python: + install: + - requirements: docs/requirements.txt diff --git a/README.md b/README.md index 7d01bbf177905243bd425d504bbb7a5fc6d47e0c..44313d0506497e5942ba53a459a0e2985ce62895 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,11 @@ # YOLO: Official Implementation of YOLOv9, YOLOv7 +[![Documentation Status](https://readthedocs.org/projects/yolo-docs/badge/?version=latest)](https://yolo-docs.readthedocs.io/en/latest/?badge=latest) ![GitHub License](https://img.shields.io/github/license/WongKinYiu/YOLO) ![WIP](https://img.shields.io/badge/status-WIP-orange) -![](https://img.shields.io/github/actions/workflow/status/WongKinYiu/YOLO/deploy.yaml) +[![Developer Mode Build & Test](https://github.com/WongKinYiu/YOLO/actions/workflows/develop.yaml/badge.svg)](https://github.com/WongKinYiu/YOLO/actions/workflows/develop.yaml) +[![Deploy Mode Validation & Inference](https://github.com/WongKinYiu/YOLO/actions/workflows/deploy.yaml/badge.svg)](https://github.com/WongKinYiu/YOLO/actions/workflows/deploy.yaml) [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/yolov9-learning-what-you-want-to-learn-using/real-time-object-detection-on-coco)](https://paperswithcode.com/sota/real-time-object-detection-on-coco) diff --git a/demo/hf_demo.py b/demo/hf_demo.py index 749e345258301ac5924d2f828b6106d28f4d951c..73ce1c5e0f69d3edd2194d4438cb5aa4cd555b49 100644 --- a/demo/hf_demo.py +++ b/demo/hf_demo.py @@ -11,7 +11,7 @@ from yolo import ( AugmentationComposer, NMSConfig, PostProccess, - Vec2Box, + create_converter, create_model, draw_bboxes, ) @@ -25,22 +25,22 @@ def load_model(model_name, device): model_cfg.model.auxiliary = {} model = create_model(model_cfg, True) model.to(device).eval() - return model + return model, model_cfg device = torch.device("cuda" if torch.cuda.is_available() else "cpu") -model = load_model(DEFAULT_MODEL, device) -v2b = Vec2Box(model, IMAGE_SIZE, device) -class_list = OmegaConf.load("yolo/config/general.yaml").class_list +model, model_cfg = load_model(DEFAULT_MODEL, device) +converter = create_converter(model_cfg.name, model, model_cfg.anchor, IMAGE_SIZE, device) +class_list = OmegaConf.load("yolo/config/dataset/coco.yaml").class_list transform = AugmentationComposer([]) def predict(model_name, image, nms_confidence, nms_iou): - global DEFAULT_MODEL, model, device, v2b, class_list, post_proccess + global DEFAULT_MODEL, model, device, converter, class_list, post_proccess if model_name != DEFAULT_MODEL: - model = load_model(model_name, device) - v2b = Vec2Box(model, IMAGE_SIZE, device) + model, model_cfg = load_model(model_name, device) + converter = create_converter(model_cfg.name, model, model_cfg.anchor, IMAGE_SIZE, device) DEFAULT_MODEL = model_name image_tensor, _, rev_tensor = transform(image) @@ -49,7 +49,7 @@ def predict(model_name, image, nms_confidence, nms_iou): rev_tensor = rev_tensor.to(device)[None] nms_config = NMSConfig(nms_confidence, nms_iou) - post_proccess = PostProccess(v2b, nms_config) + post_proccess = PostProccess(converter, nms_config) with torch.no_grad(): predict = model(image_tensor) diff --git a/docs/0_get_start/0_quick_start.rst b/docs/0_get_start/0_quick_start.rst new file mode 100644 index 0000000000000000000000000000000000000000..62a79cfe912f8653a40b3db6662e397c7ed4ea4d --- /dev/null +++ b/docs/0_get_start/0_quick_start.rst @@ -0,0 +1,71 @@ +Quick Start +=========== + +.. note:: + We expect all customizations to be done primarily by passing arguments or modifying the YAML config files. + If more detailed modifications are needed, custom content should be modularized as much as possible to avoid extensive code modifications. + +.. _QuickInstallYOLO: + +Install YOLO +------------ + +Clone the repository and install the dependencies: + +.. code-block:: bash + + git clone https://github.com/WongKinYiu/YOLO.git + cd YOLO + pip install -r requirements-dev.txt + # Make sure to work inside the cloned folder. + +Alternatively, If you are planning to make a simple change: + +**Note**: In the following examples, you should replace ``python yolo/lazy.py`` with ``yolo`` . + +.. code-block:: bash + + pip install git+https://github.com/WongKinYiu/YOLO.git + +**Note**: Most tasks already include at yolo/lazy.py, so you can run with this prefix and follow arguments: ``python yolo/lazy.py`` + + +Train Model +----------- + +To train the model, use the following command: + +.. code-block:: bash + + python yolo/lazy.py task=train + + yolo task=train # if installed via pip + +- Overriding the ``dataset`` parameter, you can customize your dataset via a dataset config. +- Overriding YOLO model by setting the ``model`` parameter to ``{v9-c, v9-m, ...}``. +- More details can be found at :ref:`Train Tutorials`. + +For example: + +.. code-block:: bash + + python yolo/lazy.py task=train dataset=AYamlFilePath model=v9-m + + yolo task=train dataset=AYamlFilePath model=v9-m # if installed via pip + +Inference & Deployment +------------------------ + +Inference is the default task of ``yolo/lazy.py``. To run inference and deploy the model, use: +More details can be found at :ref:`Inference Tutorials `. + +.. code-block:: bash + + python yolo/lazy.py task.data.source=AnySource + + yolo task.data.source=AnySource # if installed via pip + +You can enable fast inference modes by adding the parameter ``task.fast_inference={onnx, trt, deploy}``. + +- Theoretical acceleration following :ref:`YOLOv9 `. +- Hardware acceleration like :ref:`ONNX ` and :ref:`TensorRT `. for optimized deployment. diff --git a/docs/0_get_start/1_introduction.rst b/docs/0_get_start/1_introduction.rst new file mode 100644 index 0000000000000000000000000000000000000000..c7f7492bb5ed0997ee8cc5fa08f8a587e7c64cd0 --- /dev/null +++ b/docs/0_get_start/1_introduction.rst @@ -0,0 +1,66 @@ +What is YOLO +============ + +``YOLO`` (You Only Look Once) is a state-of-the-art, real-time object detection system. It is designed to predict bounding boxes and class probabilities for objects in an image with high accuracy and speed. YOLO models, including the latest YOLOv9, are known for their efficiency in detecting objects in a single forward pass through the network, making them highly suitable for real-time applications. + +YOLOv9 introduces improvements in both architecture and loss functions to enhance prediction accuracy and inference speed. + +Forward Process +--------------- + +The forward process of YOLOv9 can be visualized as follows: + +.. mermaid:: + + graph LR + subgraph YOLOv9 + Auxiliary + AP["Auxiliary Prediction"] + end + BackBone-->FPN; + FPN-->PAN; + PAN-->MP["Main Prediction"]; + BackBone-->Auxiliary; + Auxiliary-->AP; + +- **BackBone**: Extracts features from the input image. +- **FPN (Feature Pyramid Network)**: Aggregates features at different scales. +- **PAN (Region Proposal Network)**: Proposes regions of interest. +- **Main Prediction**: The primary detection output. +- **Auxiliary Prediction**: Additional predictions to assist the main prediction. + +Loss Function +------------- + +The loss function of YOLOv9 combines several components to optimize the model's performance: + +.. mermaid:: + + flowchart LR + gtb-->cls + gtb["Ground Truth"]-->iou + pdm-.->cls["Max Class"] + pdm["Main Prediction"]-.->iou["Closest IoU"] + pdm-.->anc["box in anchor"] + cls-->gt + iou-->gt["Matched GT Box"] + anc-.->gt + + gt-->Liou["IoU Loss"] + pdm-->Liou + pdm-->Lbce + gt-->Lbce["BCE Loss"] + gt-->Ldfl["DFL Loss"] + pdm-->Ldfl + + Lbce-->ML + Liou-->ML + Ldfl-->ML["Total Loss"] + +- **Ground Truth**: The actual labels and bounding boxes in the dataset. +- **Main Prediction**: The model's predicted bounding boxes and class scores. +- **IoU (Intersection over Union)**: Measures the overlap between the predicted and ground truth boxes. +- **BCE (Binary Cross-Entropy) Loss**: Used for class prediction. +- **DFL (Distribution Focal Loss)**: Used for improving the precision of bounding box regression. + +By optimizing these components, YOLOv9 aims to achieve high accuracy and robustness in object detection tasks. diff --git a/docs/0_get_start/2_installations.rst b/docs/0_get_start/2_installations.rst new file mode 100644 index 0000000000000000000000000000000000000000..5aa11286b742e6cab1a9ec32210468bf61d4d403 --- /dev/null +++ b/docs/0_get_start/2_installations.rst @@ -0,0 +1,101 @@ +Install YOLO +============ + +This guide will help you set up YOLO on your machine. +We recommend starting with `GitHub Settings <#git-github>`_ for more flexible customization. +If you are planning to perform inference only or require a simple customization, you can choose to install via `PyPI <#pypi-pip-install>`_. + +Torch Requirements +------------------- + +The following table summarizes the torch requirements for different operating systems and hardware configurations: + + +.. tabs:: + + .. tab:: Linux + + .. tabs:: + + .. tab:: CUDA + + PyTorch: 1.12+ + + .. tab:: CPU + + PyTorch: 1.12+ + + .. tab:: MacOS + + .. tabs:: + + .. tab:: MPS + + PyTorch: 2.2+ + .. tab:: CPU + PyTorch: 2.2+ + .. tab:: Windows + + .. tabs:: + + .. tab:: CUDA + + [WIP] + + .. tab:: CPU + + [WIP] + + +Git & GitHub +------------ + +First, Clone the repository: + +.. code-block:: bash + + git clone https://github.com/WongKinYiu/YOLO.git + +Alternatively, you can directly download the repository via this `link `_. + +Next, install the required packages: + +.. code-block:: bash + + # For the minimal requirements, use: + pip install -r requirements.txt + # For a full installation, use: + pip install -r requirements-dev.txt + +Moreover, if you plan to utilize ONNX or TensorRT, please follow :ref:`ONNX`, :ref:`TensorRT` for more installation details. + +PyPI (pip install) +------------------ + +.. note:: + Due to the :guilabel:`yolo` this name already being occupied in the PyPI library, we are still determining the package name. + Currently, we provide an alternative way to install via the GitHub repository. Ensure your shell has `git` and `pip3` (or `pip`). + +To install YOLO via GitHub: + +.. code-block:: bash + + pip install git+https://github.com/WongKinYiu/YOLO.git + +Docker +------ + +To run YOLO using NVIDIA Docker, you can pull the Docker image and run it with GPU support: + +.. code-block:: bash + + docker pull henrytsui000/yolo + docker run --gpus all -it henrytsui000/yolo + +Make sure you have the NVIDIA Docker toolkit installed. For more details on setting up NVIDIA Docker, refer to the `NVIDIA Docker documentation `_. + + +Conda +----- + +We will publish it in the near future! diff --git a/docs/1_tutorials/0_allIn1.rst b/docs/1_tutorials/0_allIn1.rst new file mode 100644 index 0000000000000000000000000000000000000000..044f7f56e0e1403c762153d116f0b9ed858e4d6d --- /dev/null +++ b/docs/1_tutorials/0_allIn1.rst @@ -0,0 +1,204 @@ +All In 1 +======== + +:file:`yolo.lazy` is a packaged file that includes :guilabel:`training`, :guilabel:`validation`, and :guilabel:`inference` tasks. +For detailed function documentation, thercheck out the IPython notebooks to learn how to import and use these function +the following section will break down operation inside of lazy, also supporting directly import/call the function. + +[TOC], setup, build, dataset, train, validation, inference +To train the model, you can run: + +Train Model +---------- + + +- batch size check / cuda +- training time / check +- build model / check +- dataset / check + +.. code-block:: bash + + python yolo/lazy.py task=train + +You can customize the training process by overriding the following common arguments: + +- ``name``: :guilabel:`str` + The experiment name. + +- ``model``: :guilabel:`str` + Model backbone, options include [model_zoo] v9-c, v7, v9-e, etc. + +- ``cpu_num``: :guilabel:`int` + Number of CPU workers (num_workers). + +- ``out_path``: :guilabel:`Path` + The output path for saving models and logs. + +- ``weight``: :guilabel:`Path | bool | None` + The path to pre-trained weights, False for training from scratch, None for default weights. + +- ``use_wandb``: :guilabel:`bool` + Whether to use Weights and Biases for experiment tracking. + +- ``use_TensorBoard``: :guilabel:`bool` + Whether to use TensorBoard for logging. + +- ``image_size``: :guilabel:`int | [int, int]` + The input image size. + +- ``+quiet``: :guilabel:`bool` + Optional, disable all output. + +- ``task.epoch``: :guilabel:`int` + Total number of training epochs. + +- ``task.data.batch_size``: :guilabel:`int` + The size of each batch (auto-batch sizing [WIP]). + +Examples +~~~~~~~~ + +To train a model with a specific batch size and image size, you can run: + +.. code-block:: bash + + python yolo/lazy.py task=train task.data.batch_size=12 image_size=1280 + +Multi-GPU Training with DDP +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +For multi-GPU training, we use Distributed Data Parallel (DDP) for efficient and scalable training. +DDP enable training model with mutliple GPU, even the GPUs aren't on the same machine. For more details, you can refer to the `DDP tutorial `_. + +To train on multiple GPUs, replace the ``python`` command with ``torchrun --nproc_per_node=[GPU_NUM]``. The ``nproc_per_node`` argument specifies the number of GPUs to use. + + +.. tabs:: + + .. tab:: bash + .. code-block:: bash + + torchrun --nproc_per_node=2 yolo/lazy.py task=train device=[0,1] + + .. tab:: zsh + .. code-block:: bash + + torchrun --nproc_per_node=2 yolo/lazy.py task=train device=\[0,1\] + + +Training on a Custom Dataset +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To use the auto-download module, we suggest users construct the dataset config in the following format. +If the config files include `auto_download`, the model will automatically download the dataset when creating the dataloader. + +Here is an example dataset config file: + +.. literalinclude:: ../../yolo/config/dataset/dev.yaml + :language: YAML + +Both of the following formats are acceptable: + +- ``path``: :guilabel:`str` + The path to the dataset. + +- ``train, validation``: :guilabel:`str` + The training and validation directory names under `/images`. If using txt as ground truth, these should also be the names under `/labels/`. + +- ``class_num``: :guilabel:`int` + The number of dataset classes. + +- ``class_list``: :guilabel:`List[str]` + Optional, the list of class names, used only for visualizing the bounding box classes. + +- ``auto_download``: :guilabel:`dict` + Optional, whether to auto-download the dataset. + +The dataset should include labels or annotations, preferably in JSON format for compatibility with pycocotools during inference: + +.. code-block:: text + + DataSetName/ + ├── annotations + │ ├── train_json_name.json + │ └── val_json_name.json + ├── labels/ + │ ├── train/ + │ │ ├── AnyLabelName.txt + │ │ └── ... + │ └── validation/ + │ └── ... + └── images/ + ├── train/ + │ ├── AnyImageNameN.{png,jpg,jpeg} + │ └── ... + └── validation/ + └── ... + + +Validation Model +---------------- + +During training, this block will be auto-executed. You may also run this task manually to generate a JSON file representing the predictions for a given validation dataset. If the validation set includes JSON annotations, it will run pycocotools for evaluation. + +We recommend setting ``task.data.shuffle`` to False and turning off ``task.data.data_augment``. + +You can customize the validation process by overriding the following arguments: + +- ``task.nms.min_confidence``: :guilabel:`str` + The minimum confidence of model prediction. + +- ``task.nms.min_iou``: :guilabel:`str` + The minimum IoU threshold for NMS (Non-Maximum Suppression). + +Examples +~~~~~~~~ + +.. tabs:: + + .. tab:: git-cloned + .. code-block:: bash + + python yolo/lazy.py task=validation task.nms.min_iou=0.9 + + .. tab:: PyPI + .. code-block:: bash + + yolo task=validation task.nms.min_iou=0.9 + + +Model Inference +--------------- + +.. note:: + The ``dataset`` parameter shouldn't be overridden because the model requires the ``class_num`` of the dataset. If the classes have names, please provide the ``class_list``. + +You can customize the inference process by overriding the following arguments: + +- ``task.fast_inference``: :guilabel:`str` + Optional. Values can be `onnx`, `trt`, `deploy`, or `None`. `deploy` will detach the model auxiliary head. + +- ``task.data.source``: :guilabel:`str | Path | int` + This argument will be auto-resolved and could be a webcam ID, image folder path, video/image path. + +- ``task.nms.min_confidence``: :guilabel:`str` + The minimum confidence of model prediction. + +- ``task.nms.min_iou``: :guilabel:`str` + The minimum IoU threshold for NMS (Non-Maximum Suppression). + +Examples +~~~~~~~~ + +.. tabs:: + + .. tab:: git-cloned + .. code-block:: bash + + python yolo/lazy.py model=v9-m task.nms.min_confidence=0.1 task.data.source=0 task.fast_inference=onnx + + .. tab:: PyPI + .. code-block:: bash + + yolo model=v9-m task.nms.min_confidence=0.1 task.data.source=0 task.fast_inference=onnx diff --git a/docs/1_tutorials/1_setup.rst b/docs/1_tutorials/1_setup.rst new file mode 100644 index 0000000000000000000000000000000000000000..9b7dd5241d7783381552009ae82546f46af9a396 --- /dev/null +++ b/docs/1_tutorials/1_setup.rst @@ -0,0 +1,35 @@ +Setup Config +============ + +To set up your configuration, you will need to generate a configuration class based on :class:`~yolo.config.config.Config`, which can be achieved using `hydra `_. +The configuration will include all the necessary settings for your ``task``, including general configuration, ``dataset`` information, and task-specific information (``train``, ``inference``, ``validation``). + +Next, create the progress logger to handle the output and progress bar. This class is based on `rich `_'s progress bar and customizes the logger (print function) using `loguru `_. + +.. tabs:: + + .. tab:: decorator + .. code-block:: python + + import hydra + from yolo import ProgressLogger + from yolo.config.config import Config + + @hydra.main(config_path="config", config_name="config", version_base=None) + def main(cfg: Config): + progress = ProgressLogger(cfg, exp_name=cfg.name) + pass + + .. tab:: initialize & compose + .. code-block:: python + + from hydra import compose, initialize + from yolo import ProgressLogger + from yolo.config.config import Config + + with initialize(config_path="config", version_base=None): + cfg = compose(config_name="config", overrides=["task=train", "model=v9-c"]) + + progress = ProgressLogger(cfg, exp_name=cfg.name) + +TODO: add a config over view diff --git a/docs/1_tutorials/2_buildmodel.rst b/docs/1_tutorials/2_buildmodel.rst new file mode 100644 index 0000000000000000000000000000000000000000..b6df32ab70e11d8154324a63bbfa2c2abecc1b76 --- /dev/null +++ b/docs/1_tutorials/2_buildmodel.rst @@ -0,0 +1,62 @@ +Build Model +=========== + +In YOLOv7, the prediction will be ``Anchor``, and in YOLOv9, it will predict ``Vector``. The converter will turn the bounding box to the vector. + +The overall model flowchart is as follows: + +.. mermaid:: + + flowchart LR + Input-->Model; + Model--Class-->NMS; + Model--Anc/Vec-->Converter; + Converter--Box-->NMS; + NMS-->Output; + +Load Model +~~~~~~~~~~ + +Using `create_model`, it will automatically create the :class:`~yolo.model.yolo.YOLO` model and load the provided weights. + +Arguments: + +- **model**: :class:`~yolo.config.config.ModelConfig` + The model configuration. +- **class_num**: :guilabel:`int` + The number of classes in the dataset, used for the YOLO's prediction head. +- **weight_path**: :guilabel:`Path | bool` + The path to the model weights. + - If `False`, weights are not loaded. + - If :guilabel:`True | None`, default weights are loaded. + - If a `Path`, the model weights are loaded from the specified path. + +.. code-block:: python + + model = create_model(cfg.model, class_num=cfg.dataset.class_num, weight_path=cfg.weight) + model = model.to(device) + +Deploy Model +~~~~~~~~~~~~ + +In the deployment version, we will remove the auxiliary branch of the model for fast inference. If the config includes ONNX and TensorRT, it will load/compile the model to ONNX or TensorRT format after removing the auxiliary branch. + +.. code-block:: python + + model = FastModelLoader(cfg).load_model(device) + +Autoload Converter +~~~~~~~~~~~~~~~~~~ + +Autoload the converter based on the model type (v7 or v9). + +Arguments: + +- **Model Name**: :guilabel:`str` + Used for choosing ``Vec2Box`` or ``Anc2Box``. +- **Anchor Config**: The anchor configuration, used to generate the anchor grid. +- **model**, **image_size**: Used for auto-detecting the anchor grid. + +.. code-block:: python + + converter = create_converter(cfg.model.name, model, cfg.model.anchor, cfg.image_size, device) diff --git a/docs/1_tutorials/3_dataset.rst b/docs/1_tutorials/3_dataset.rst new file mode 100644 index 0000000000000000000000000000000000000000..1b49023e9c15de2c5ff2e06cccb2fd5be2cc35f7 --- /dev/null +++ b/docs/1_tutorials/3_dataset.rst @@ -0,0 +1,77 @@ +Create Dataset +============== + +In this section, we will prepare the dataset and create a dataloader. + +Overall, the dataloader can be created by: + +.. code-block:: python + + from yolo import create_dataloader + dataloader = create_dataloader(cfg.task.data, cfg.dataset, cfg.task.task, use_ddp) + +For inference, the dataset will be handled by :class:`~yolo.tools.data_loader.StreamDataLoader`, while for training and validation, it will be handled by :class:`~yolo.tools.data_loader.YoloDataLoader`. + +The input arguments are: + +- **DataConfig**: :class:`~yolo.config.config.DataConfig`, the relevant configuration for the dataloader. +- **DatasetConfig**: :class:`~yolo.config.config.DatasetConfig`, the relevant configuration for the dataset. +- **task_name**: :guilabel:`str`, the task name, which can be `inference`, `validation`, or `train`. +- **use_ddp**: :guilabel:`bool`, whether to use DDP (Distributed Data Parallel). Default is `False`. + +Train and Validation +---------------------------- + +Dataloader Return Type +~~~~~~~~~~~~~~~~~~~~~ + +For each iteration, the return type includes: + +- **batch_size**: the size of each batch, used to calculate batch average loss. +- **images**: the input images. +- **targets**: the ground truth of the images according to the task. + +Auto Download Dataset +~~~~~~~~~~~~~~~~~~~~~ + +The dataset will be auto-downloaded if the user provides the `auto_download` configuration. For example, if the configuration is as follows: + + +.. literalinclude:: ../../yolo/config/dataset/mock.yaml + :language: YAML + + +First, it will download and unzip the dataset from `{prefix}/{postfix}`, and verify that the dataset has `{file_num}` files. + +Once the dataset is verified, it will generate `{train, validation}.cache` in Tensor format, which accelerates the dataset preparation speed. + +Inference +----------------- + +In streaming mode, the model will infer the most recent frame and draw the bounding boxes by default, given the save flag to save the image. In other modes, it will save the predictions to `runs/inference/{exp_name}/outputs/` by default. + +Dataloader Return Type +~~~~~~~~~~~~~~~~~~~~~ + +For each iteration, the return type of `StreamDataLoader` includes: + +- **images**: tensor, the size of each batch, used to calculate batch average loss. +- **rev_tensor**: tensor, reverse tensor for reverting the bounding boxes and images to the input shape. +- **origin_frame**: tensor, the original input image. + +Input Type +~~~~~~~~~~ + +- **Stream Input**: + + - **webcam**: :guilabel:`int`, ID of the webcam, for example, 0, 1. + - **rtmp**: :guilabel:`str`, RTMP address. + +- **Single Source**: + + - **image**: :guilabel:`Path`, path to image files (`jpeg`, `jpg`, `png`, `tiff`). + - **video**: :guilabel:`Path`, path to video files (`mp4`). + +- **Folder**: + + - **folder of images**: :guilabel:`Path`, the relative or absolute path to the folder containing images. diff --git a/docs/1_tutorials/4_train.rst b/docs/1_tutorials/4_train.rst new file mode 100644 index 0000000000000000000000000000000000000000..e51f36c84a975c1d5536353a1b6547727d19c4b4 --- /dev/null +++ b/docs/1_tutorials/4_train.rst @@ -0,0 +1,55 @@ +Train & Validation +================== + +Training Model +---------------- + +To train a model, the :class:`~yolo.tools.solver.ModelTrainer` can help manage the training process. Initialize the :class:`~yolo.tools.solver.ModelTrainer` and use the :func:`~yolo.tools.solver.ModelTrainer.solve` function to start the training. + +Before starting the training, don't forget to start the progress logger to enable logging the process status. This will also enable `Weights & Biases (wandb) `_ or TensorBoard if configured. + +.. code-block:: python + + from yolo import ModelTrainer + solver = ModelTrainer(cfg, model, converter, progress, device, use_ddp) + progress.start() + solver.solve(dataloader) + +Training Diagram +~~~~~~~~~~~~~~~~ + +The following diagram illustrates the training process: + +.. mermaid:: + + flowchart LR + subgraph TS["trainer.solve"] + subgraph TE["train one epoch"] + subgraph "train one batch" + backpropagation-->TF[forward] + TF-->backpropagation + end + end + subgraph validator.solve + VC["calculate mAP"]-->VF[forward] + VF[forward]-->VC + end + end + TE-->validator.solve + validator.solve-->TE + +Validation Model +---------------- + +To validate the model performance, we follow a similar approach as the training process using :class:`~yolo.tools.solver.ModelValidator`. + +.. code-block:: python + + from yolo import ModelValidator + solver = ModelValidator(cfg, model, converter, progress, device, use_ddp) + progress.start() + solver.solve(dataloader) + +The :class:`~yolo.tools.solver.ModelValidator` class helps manage the validation process, ensuring that the model's performance is evaluated accurately. + +.. note:: The original training process already includes the validation phase. Call this separately if you want to run the validation again after the training is completed. diff --git a/docs/1_tutorials/5_inference.rst b/docs/1_tutorials/5_inference.rst new file mode 100644 index 0000000000000000000000000000000000000000..825ee34a0de2f0567c5e4da9d7f98472ed3a4e5c --- /dev/null +++ b/docs/1_tutorials/5_inference.rst @@ -0,0 +1,20 @@ +Inference +========== + + +Inference Video +--------------- + +Inference Image +--------------- +task: inference + +fast_inference: # onnx, trt, deploy or Empty +data: + source: demo/images/inference/image.png + image_size: ${image_size} + data_augment: {} +nms: + min_confidence: 0.5 + min_iou: 0.5 +# save_predict: True diff --git a/docs/2_model_zoo/0_object_detection.rst b/docs/2_model_zoo/0_object_detection.rst new file mode 100644 index 0000000000000000000000000000000000000000..04393162293b4a2f0aaf91708da42b2f146f08fe --- /dev/null +++ b/docs/2_model_zoo/0_object_detection.rst @@ -0,0 +1,169 @@ +Object Detection +================ + +YOLOv7 +~~~~~~ + + +.. list-table:: + :header-rows: 1 + + * - Model + - State + - Test Size + - :math:`AP^{val}` + - :math:`AP_{50}^{val}` + - :math:`AP_{75}^{val}` + - Param. + - FLOPs + * - `YOLOv7 `_ + - 🔧 + - 640 + - **51.4%** + - **69.7%** + - **55.9%** + - + - + * - `YOLOv7-X `_ + - 🔧 + - 640 + - **53.1%** + - **71.2%** + - **57.8%** + - + - + * - `YOLOv7-W6 `_ + - 🔧 + - 1280 + - **54.9%** + - **72.6%** + - **60.1%** + - + - + * - `YOLOv7-E6 `_ + - 🔧 + - 1280 + - **56.0%** + - **73.5%** + - **61.2%** + - + - + * - `YOLOv7-D6 `_ + - 🔧 + - 1280 + - **56.6%** + - **74.0%** + - **61.8%** + - + - + * - `YOLOv7-E6E `_ + - 🔧 + - 1280 + - **56.8%** + - **74.4%** + - **62.1%** + - + - + +YOLOv9 +~~~~~~ + +.. list-table:: + :header-rows: 1 + + * - Model + - State + - Test Size + - :math:`AP^{val}` + - :math:`AP_{50}^{val}` + - :math:`AP_{75}^{val}` + - Param. + - FLOPs + * - `YOLOv9-T `_ + - 🔧 + - 640 + - + - + - + - + - + * - `YOLOv9-S `_ + - ✅ + - 640 + - **46.8%** + - **63.4%** + - **50.7%** + - **7.1M** + - **26.4G** + * - `YOLOv9-M `_ + - ✅ + - 640 + - **51.4%** + - **68.1%** + - **56.1%** + - **20.0M** + - **76.3G** + * - `YOLOv9-C `_ + - ✅ + - 640 + - **53.0%** + - **70.2%** + - **57.8%** + - **25.3M** + - **102.1G** + * - `YOLOv9-E `_ + - 🔧 + - 640 + - **55.6%** + - **72.8%** + - **60.6%** + - **57.3M** + - **189.0G** + + + + +.. mermaid:: + + graph LR + subgraph BackBone + B1-->B2; + B2-->B3; + B3-->B4; + B4-->B5; + end + + subgraph FPN + B3-->N3; + B4-->N4; + B5-->N5; + N5-->N4; + N4-->N3; + end + + subgraph PAN + P3-->P4; + P4-->P5; + N3-->P3; + N4-->P4; + N5-->P5; + end + + P3-->Main_Head; + P4-->Main_Head; + P5-->Main_Head; + + subgraph Aux + B3-->R3; + B4-->R4; + B5-->R5; + R3-->A3; + R4-->A3; + R4-->A4; + R5-->A3; + R5-->A4; + R5-->A5; + end + A3-->Auxiliary_Head; + A4-->Auxiliary_Head; + A5-->Auxiliary_Head; diff --git a/docs/2_model_zoo/1_segmentation.rst b/docs/2_model_zoo/1_segmentation.rst new file mode 100644 index 0000000000000000000000000000000000000000..3a7ae9dd02b4ad161d65071d5730db68412b2d51 --- /dev/null +++ b/docs/2_model_zoo/1_segmentation.rst @@ -0,0 +1,11 @@ +Segmentations +============= +.. _YOLOv7-seg: + +YOLOv7 +------ + +.. _YOLOv9-seg: + +YOLOv9 +------ diff --git a/docs/2_model_zoo/2_classification.rst b/docs/2_model_zoo/2_classification.rst new file mode 100644 index 0000000000000000000000000000000000000000..bc46c2a5d377717946634924822d6b775f66ef8d --- /dev/null +++ b/docs/2_model_zoo/2_classification.rst @@ -0,0 +1,4 @@ +Classification +============== + +[WIP] diff --git a/docs/3_custom/0_model.rst b/docs/3_custom/0_model.rst new file mode 100644 index 0000000000000000000000000000000000000000..7d068cfd4ac803632246ac2d81387798ba48a437 --- /dev/null +++ b/docs/3_custom/0_model.rst @@ -0,0 +1,12 @@ +Model +===== + +Modified Architecture +--------------------- + + + + + +Modified Model Module +--------------------- diff --git a/docs/3_custom/1_data_augment.rst b/docs/3_custom/1_data_augment.rst new file mode 100644 index 0000000000000000000000000000000000000000..f3809cecaf38d13a9e0da499629b12fe543137a8 --- /dev/null +++ b/docs/3_custom/1_data_augment.rst @@ -0,0 +1,4 @@ +.. _DataAugment: + +Data Augment +============ diff --git a/docs/3_custom/2_loss.rst b/docs/3_custom/2_loss.rst new file mode 100644 index 0000000000000000000000000000000000000000..19f4cdea2e7bb50b726f8795110072fb423a3299 --- /dev/null +++ b/docs/3_custom/2_loss.rst @@ -0,0 +1,2 @@ +Loss Function +============= diff --git a/docs/3_custom/3_task.rst b/docs/3_custom/3_task.rst new file mode 100644 index 0000000000000000000000000000000000000000..8285b8fc48ebf0c8dbde54cdbde1702061f61776 --- /dev/null +++ b/docs/3_custom/3_task.rst @@ -0,0 +1,2 @@ +Custom Task +=========== diff --git a/docs/4_deploy/1_deploy.rst b/docs/4_deploy/1_deploy.rst new file mode 100644 index 0000000000000000000000000000000000000000..e5be667134da442b958e83713d92eea8ec299dbd --- /dev/null +++ b/docs/4_deploy/1_deploy.rst @@ -0,0 +1,10 @@ +.. _Deploy: + +Deploy Model +============ + +Deploy YOLOv9 +------------- + +Deploy YOLOv7 +------------- diff --git a/docs/4_deploy/2_onnx.rst b/docs/4_deploy/2_onnx.rst new file mode 100644 index 0000000000000000000000000000000000000000..fa2dda12c50fffb7fa825812b59e3485f0476776 --- /dev/null +++ b/docs/4_deploy/2_onnx.rst @@ -0,0 +1,4 @@ +.. _ONNX: + +Compile to ONNX +=============== diff --git a/docs/4_deploy/3_tensorrt.rst b/docs/4_deploy/3_tensorrt.rst new file mode 100644 index 0000000000000000000000000000000000000000..2c899bb65538ee40fc3cb396d09226d6c6633a55 --- /dev/null +++ b/docs/4_deploy/3_tensorrt.rst @@ -0,0 +1,5 @@ +.. _TensorRT: + + +Compile to TensorRT +=================== diff --git a/docs/5_features/0_small_object.rst b/docs/5_features/0_small_object.rst new file mode 100644 index 0000000000000000000000000000000000000000..3d161e5b41b8f17f4f13beefbff70d0f8b57bf8e --- /dev/null +++ b/docs/5_features/0_small_object.rst @@ -0,0 +1,2 @@ +Small Object +============ diff --git a/docs/5_features/1_version_convert.rst b/docs/5_features/1_version_convert.rst new file mode 100644 index 0000000000000000000000000000000000000000..1751bfed2e2511ce31574995de15c9026e40aaed --- /dev/null +++ b/docs/5_features/1_version_convert.rst @@ -0,0 +1,2 @@ +Version Convert +=============== diff --git a/docs/5_features/2_IPython.rst b/docs/5_features/2_IPython.rst new file mode 100644 index 0000000000000000000000000000000000000000..b96d6344982abd166e2016e5dd270419fe6fdf34 --- /dev/null +++ b/docs/5_features/2_IPython.rst @@ -0,0 +1,2 @@ +IPython +======= diff --git a/docs/6_function_docs/0_solver.rst b/docs/6_function_docs/0_solver.rst new file mode 100644 index 0000000000000000000000000000000000000000..6a2f0c71f106c31da43253de2f53a21cbafb7c37 --- /dev/null +++ b/docs/6_function_docs/0_solver.rst @@ -0,0 +1,12 @@ +Solver +====== + +.. automodule:: yolo.tools.solver + :members: + :undoc-members: + :show-inheritance: + +.. automodule:: yolo.utils.bounding_box_utils + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/6_function_docs/1_tools.rst b/docs/6_function_docs/1_tools.rst new file mode 100644 index 0000000000000000000000000000000000000000..bb14502f32de7193b55f08b9afbf562e24a808ab --- /dev/null +++ b/docs/6_function_docs/1_tools.rst @@ -0,0 +1,4 @@ +.. _Tools: + +Useful Tools +============ diff --git a/docs/6_function_docs/2_module.rst b/docs/6_function_docs/2_module.rst new file mode 100644 index 0000000000000000000000000000000000000000..99cbca4bab1eb45b848ea23696353706756a54c7 --- /dev/null +++ b/docs/6_function_docs/2_module.rst @@ -0,0 +1,4 @@ +.. _Module: + +Model Module +============ diff --git a/docs/6_function_docs/3_config.rst b/docs/6_function_docs/3_config.rst new file mode 100644 index 0000000000000000000000000000000000000000..fc1cd954d369d4308c5ce657719514366e7ec5fb --- /dev/null +++ b/docs/6_function_docs/3_config.rst @@ -0,0 +1,188 @@ +Config +====== + + + +.. autoclass:: yolo.config.config.Config + :members: + :undoc-members: + +.. automodule:: yolo.config.config + :members: + :undoc-members: + + + +.. mermaid:: + + classDiagram + class AnchorConfig { + List~int~ strides + Optional~int~ reg_max + Optional~int~ anchor_num + List~List~int~~ anchor + } + + class LayerConfig { + Dict args + Union~List~int~~ source + str tags + } + + class BlockConfig { + List~Dict~LayerConfig~~ block + } + + class ModelConfig { + Optional~str~ name + AnchorConfig anchor + Dict~BlockConfig~ model + } + + AnchorConfig --> ModelConfig + LayerConfig --> BlockConfig + BlockConfig --> ModelConfig + +.. mermaid:: + + classDiagram + class DownloadDetail { + str url + int file_size + } + + class DownloadOptions { + Dict~DownloadDetail~ details + } + + class DatasetConfig { + str path + int class_num + List~str~ class_list + Optional~DownloadOptions~ auto_download + } + + class DataConfig { + bool shuffle + int batch_size + bool pin_memory + int cpu_num + List~int~ image_size + Dict~int~ data_augment + Optional~Union~str~~ source + } + + DownloadDetail --> DownloadOptions + DownloadOptions --> DatasetConfig + +.. mermaid:: + + classDiagram + class OptimizerArgs { + float lr + float weight_decay + } + + class OptimizerConfig { + str type + OptimizerArgs args + } + + class MatcherConfig { + str iou + int topk + Dict~str~ factor + } + + class LossConfig { + Dict~str~ objective + Union~bool~ aux + MatcherConfig matcher + } + + class SchedulerConfig { + str type + Dict~str~ warmup + Dict~str~ args + } + + class EMAConfig { + bool enabled + float decay + } + + class TrainConfig { + str task + int epoch + DataConfig data + OptimizerConfig optimizer + LossConfig loss + SchedulerConfig scheduler + EMAConfig ema + ValidationConfig validation + } + + class NMSConfig { + int min_confidence + int min_iou + } + + class InferenceConfig { + str task + NMSConfig nms + DataConfig data + Optional~None~ fast_inference + bool save_predict + } + + class ValidationConfig { + str task + NMSConfig nms + DataConfig data + } + + OptimizerArgs --> OptimizerConfig + OptimizerConfig --> TrainConfig + MatcherConfig --> LossConfig + LossConfig --> TrainConfig + SchedulerConfig --> TrainConfig + EMAConfig --> TrainConfig + NMSConfig --> InferenceConfig + NMSConfig --> ValidationConfig + + +.. mermaid:: + + classDiagram + class GeneralConfig { + str name + Union~str~ device + int cpu_num + List~int~ class_idx_id + List~int~ image_size + str out_path + bool exist_ok + int lucky_number + bool use_wandb + bool use_TensorBoard + Optional~str~ weight + } + +.. mermaid:: + + classDiagram + class Config { + Union~ValidationConfig~ task + DatasetConfig dataset + ModelConfig model + GeneralConfig model + } + + DatasetConfig --> Config + DataConfig --> TrainConfig + DataConfig --> InferenceConfig + DataConfig --> ValidationConfig + InferenceConfig --> Config + ValidationConfig --> Config + TrainConfig --> Config + GeneralConfig --> Config diff --git a/docs/6_function_docs/4_dataloader.rst b/docs/6_function_docs/4_dataloader.rst new file mode 100644 index 0000000000000000000000000000000000000000..dd8e5ff58170049b0b46ae63f127dc99119827d4 --- /dev/null +++ b/docs/6_function_docs/4_dataloader.rst @@ -0,0 +1,8 @@ +Dataloader +========== + + + +.. automodule:: yolo.tools.data_loader + :members: + :undoc-members: diff --git a/docs/MODELS.md b/docs/MODELS.md deleted file mode 100644 index ff2ea0439687d16c2aae726cdf08da7bc30d1b68..0000000000000000000000000000000000000000 --- a/docs/MODELS.md +++ /dev/null @@ -1,30 +0,0 @@ -# YOLO Model Zoo - -Welcome to the YOLOv9 Model Zoo! Here, you will find a variety of pre-trained models tailored to different use cases and performance needs. Each model comes with detailed information about its training regime, performance metrics, and usage instructions. - -## Standard Models - -These models are trained on common datasets like COCO and provide a balance between speed and accuracy. - - -| Model | Support? |Test Size | APval | AP50val | AP75val | Param. | FLOPs | -| :-- | :-: | :-: | :-: | :-: | :-: | :-: | :-: | -| [**YOLOv9-S**]() |✅ | 640 | **46.8%** | **63.4%** | **50.7%** | **7.1M** | **26.4G** | -| [**YOLOv9-M**]() |✅ | 640 | **51.4%** | **68.1%** | **56.1%** | **20.0M** | **76.3G** | -| [**YOLOv9-C**]() |✅ | 640 | **53.0%** | **70.2%** | **57.8%** | **25.3M** | **102.1G** | -| [**YOLOv9-E**]() | 🔧 | 640 | **55.6%** | **72.8%** | **60.6%** | **57.3M** | **189.0G** | -| | | | | | | | -| [**YOLOv7**]() |🔧 | 640 | **51.4%** | **69.7%** | **55.9%** | -| [**YOLOv7-X**]() |🔧 | 640 | **53.1%** | **71.2%** | **57.8%** | -| [**YOLOv7-W6**]() | 🔧 | 1280 | **54.9%** | **72.6%** | **60.1%** | -| [**YOLOv7-E6**]() | 🔧 | 1280 | **56.0%** | **73.5%** | **61.2%** | -| [**YOLOv7-D6**]() | 🔧 | 1280 | **56.6%** | **74.0%** | **61.8%** | -| [**YOLOv7-E6E**]() | 🔧 | 1280 | **56.8%** | **74.4%** | **62.1%** | - -## Download and Usage Instructions - -To use these models, download them from the links provided and use the following command to run detection: - -```bash -$yolo detect weights=path/to/model.pt img=640 conf=0.25 source=your_image.jpg -``` diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..d4bb2cbb9eddb1bb1b4f366623044af8e4830919 --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000000000000000000000000000000000000..e288a2abaa65c7e8dbfdf643975f46706ec129c0 --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,50 @@ +# Configuration file for the Sphinx documentation builder. +# +# For the full list of built-in configuration values, see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Project information ----------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information + +project = "YOLO-docs" +copyright = "2024, Kin-Yiu, Wong and Hao-Tang, Tsui" +author = "Kin-Yiu, Wong and Hao-Tang, Tsui" + +# -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration + +import os +import sys + +sys.path.insert(0, os.path.abspath("..")) + +extensions = [ + "sphinx_rtd_theme", + "sphinx_tabs.tabs", + "sphinxcontrib.mermaid", + "sphinx.ext.autodoc", + "sphinx.ext.autosectionlabel", + "sphinx.ext.viewcode", + "sphinx.ext.napoleon", + "linuxdoc.rstFlatTable", + "myst_parser", +] + +myst_enable_extensions = [ + "dollarmath", + "amsmath", + "deflist", +] +html_theme = "sphinx_rtd_theme" +html_theme_options = { + "sticky_navigation": False, +} + +templates_path = ["_templates"] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] + + +# -- Options for HTML output ------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output + +html_static_path = ["_static"] diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..6d7cc6d64764bee89f8d81aad9d6fa0417ea0433 --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,90 @@ +YOLO documentation +======================= + +Introduction +------------ + +YOLO (You Only Look Once) is a state-of-the-art, real-time object detection system that is designed for both efficiency and accuracy. This documentation provides comprehensive guidance on how to set up, configure, and effectively use YOLO for object detection tasks. + +**Note: This project and some sections of this documentation are currently a work in progress.** + +Project Features +---------------- + +- **Real-time Processing**: YOLO can process images in real-time with high accuracy, making it suitable for applications that require instant detection. +- **Multitasking Capabilities**: Our enhanced version of YOLO supports multitasking, allowing it to handle multiple object detection tasks simultaneously. +- **Open Source**: YOLO is open source, released under the MIT License, encouraging a broad community of developers to contribute and build upon the existing framework. + +Documentation Contents +---------------------- + +Explore our documentation: + + +.. toctree:: + :maxdepth: 1 + :caption: Get Started + + 0_get_start/0_quick_start + 0_get_start/1_introduction + 0_get_start/2_installations + +.. toctree:: + :maxdepth: 1 + :caption: Tutorials + + 1_tutorials/0_allIn1 + 1_tutorials/1_setup + 1_tutorials/2_buildmodel + 1_tutorials/3_dataset + 1_tutorials/4_train + 1_tutorials/5_inference + + +.. toctree:: + :maxdepth: 1 + :caption: Model Zoo + + 2_model_zoo/0_object_detection + 2_model_zoo/1_segmentation + 2_model_zoo/2_classification + +.. toctree:: + :maxdepth: 1 + :caption: Custom YOLO + + 3_custom/0_model + 3_custom/1_data_augment + 3_custom/2_loss + 3_custom/3_task + + +.. toctree:: + :maxdepth: 1 + :caption: Deploy + + 4_deploy/1_deploy + 4_deploy/2_onnx + 4_deploy/3_tensorrt + + +.. toctree:: + :maxdepth: 1 + :caption: Features + + 5_features/0_small_object + 5_features/1_version_convert + 5_features/2_IPython + +.. toctree:: + :maxdepth: 1 + :caption: Function Docs + + 6_function_docs/0_solver + 6_function_docs/1_tools + 6_function_docs/2_module + +License +------- + +YOLO is provided under the MIT License, which allows extensive freedom for reuse and distribution. See the LICENSE file for full license text. diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000000000000000000000000000000000000..32bb24529f92346af26219baed295b7488b77534 --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=. +set BUILDDIR=_build + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +if "%1" == "" goto help + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..fc1a16d266cdac3d46a0c9a2c40a1d51c40d6f1d --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,6 @@ +myst-parser +linuxdoc +sphinx +sphinx-tabs +sphinx_rtd_theme +sphinxcontrib-mermaid diff --git a/examples/notebook_TensorRT.ipynb b/examples/notebook_TensorRT.ipynb index 87ff34669f9d2b1508a2443dc9f3ba35617b7040..b75e031074d0be04b68d12013a2612ea5fa30fc7 100644 --- a/examples/notebook_TensorRT.ipynb +++ b/examples/notebook_TensorRT.ipynb @@ -18,7 +18,15 @@ "project_root = Path().resolve().parent\n", "sys.path.append(str(project_root))\n", "\n", - "from yolo import AugmentationComposer, bbox_nms, create_model, custom_logger, draw_bboxes, Vec2Box\n", + "from yolo import (\n", + " AugmentationComposer, \n", + " bbox_nms, \n", + " create_model, \n", + " custom_logger, \n", + " create_converter,\n", + " draw_bboxes, \n", + " Vec2Box\n", + ")\n", "from yolo.config.config import NMSConfig" ] }, @@ -49,6 +57,8 @@ "metadata": {}, "outputs": [], "source": [ + "with open(MODEL_CONFIG) as stream:\n", + " cfg_model = OmegaConf.load(stream)\n", "if os.path.exists(TRT_WEIGHT_PATH):\n", " from torch2trt import TRTModule\n", "\n", @@ -57,8 +67,6 @@ "else:\n", " from torch2trt import torch2trt\n", "\n", - " with open(MODEL_CONFIG) as stream:\n", - " cfg_model = OmegaConf.load(stream)\n", "\n", " model = create_model(cfg_model, weight_path=WEIGHT_PATH)\n", " model = model.to(device).eval()\n", @@ -70,7 +78,7 @@ " logger.info(f\"📥 TensorRT model saved to oonx.pt\")\n", "\n", "transform = AugmentationComposer([], IMAGE_SIZE)\n", - "vec2box = Vec2Box(model_trt, IMAGE_SIZE, device)\n" + "converter = create_converter(cfg_model.name, model_trt, cfg_model.anchor, IMAGE_SIZE, device)\n" ] }, { @@ -79,7 +87,7 @@ "metadata": {}, "outputs": [], "source": [ - "image, bbox = transform(image, torch.zeros(0, 5))\n", + "image, bbox, rev_tensor = transform(image, torch.zeros(0, 5))\n", "image = image.to(device)[None]" ] }, @@ -91,7 +99,7 @@ "source": [ "with torch.no_grad():\n", " predict = model_trt(image)\n", - " predict = vec2box(predict[\"Main\"])\n", + " predict = converter(predict[\"Main\"])\n", "predict_box = bbox_nms(predict[0], predict[2], NMSConfig(0.5, 0.5))\n", "draw_bboxes(image, predict_box)" ] @@ -122,7 +130,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.1.undefined" + "version": "3.10.14" } }, "nbformat": 4, diff --git a/examples/notebook_inference.ipynb b/examples/notebook_inference.ipynb index 8bb3cc6f55e63577cf8bafd8d639b3d042dd0d2c..ddb7dcf6d60b60f1bef2eccc2ef81f267122221d 100644 --- a/examples/notebook_inference.ipynb +++ b/examples/notebook_inference.ipynb @@ -1,5 +1,15 @@ { "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, { "cell_type": "code", "execution_count": null, @@ -35,7 +45,7 @@ "source": [ "CONFIG_PATH = \"../yolo/config\"\n", "CONFIG_NAME = \"config\"\n", - "MODEL = \"v7-base\"\n", + "MODEL = \"v9-c\"\n", "\n", "DEVICE = 'cuda:0'\n", "CLASS_NUM = 80\n", @@ -54,7 +64,9 @@ "with initialize(config_path=CONFIG_PATH, version_base=None, job_name=\"notebook_job\"):\n", " cfg: Config = compose(config_name=CONFIG_NAME, overrides=[\"task=inference\", f\"task.data.source={IMAGE_PATH}\", f\"model={MODEL}\"])\n", " model = create_model(cfg.model, class_num=CLASS_NUM).to(device)\n", + "\n", " transform = AugmentationComposer([], cfg.image_size)\n", + "\n", " converter = create_converter(cfg.model.name, model, cfg.model.anchor, cfg.image_size, device)\n", " post_proccess = PostProccess(converter, cfg.task.nms)" ] @@ -81,7 +93,7 @@ " predict = model(image)\n", " pred_bbox = post_proccess(predict, rev_tensor)\n", "\n", - "draw_bboxes(pil_image, pred_bbox, idx2label=cfg.class_list)" + "draw_bboxes(pil_image, pred_bbox, idx2label=cfg.dataset.class_list)" ] }, { @@ -92,6 +104,11 @@ "\n", "![image](../demo/images/output/visualize.png)" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] } ], "metadata": { diff --git a/examples/notebook_smallobject.ipynb b/examples/notebook_smallobject.ipynb index c025b526b2863d04d3438127d43fd4d6789b7198..c2f75ab2b6a1a4e29f3277cdc7da6b4931b131a3 100644 --- a/examples/notebook_smallobject.ipynb +++ b/examples/notebook_smallobject.ipynb @@ -30,7 +30,17 @@ "project_root = Path().resolve().parent\n", "sys.path.append(str(project_root))\n", "\n", - "from yolo import AugmentationComposer, bbox_nms, Config, create_model, custom_logger, draw_bboxes, Vec2Box, NMSConfig, PostProccess" + "from yolo import (\n", + " AugmentationComposer, \n", + " Config, \n", + " NMSConfig, \n", + " PostProccess,\n", + " bbox_nms, \n", + " create_model, \n", + " create_converter, \n", + " custom_logger, \n", + " draw_bboxes, \n", + ")" ] }, { @@ -62,8 +72,8 @@ " cfg: Config = compose(config_name=CONFIG_NAME, overrides=[\"task=inference\", f\"task.data.source={IMAGE_PATH}\", f\"model={MODEL}\"])\n", " model = create_model(cfg.model, class_num=CLASS_NUM).to(device)\n", " transform = AugmentationComposer([], cfg.image_size)\n", - " vec2box = Vec2Box(model, cfg.image_size, device)\n", - " post_proccess = PostProccess(vec2box, NMSConfig(0.5, 0.9))\n", + " converter = create_converter(cfg.model.name, model, cfg.model.anchor, cfg.image_size, device)\n", + " post_proccess = PostProccess(converter, NMSConfig(0.5, 0.9))\n", " " ] }, @@ -112,7 +122,7 @@ "with torch.no_grad():\n", " total_image, total_shift = slide_image(image)\n", " predict = model(total_image)\n", - " pred_class, _, pred_bbox = vec2box(predict[\"Main\"])\n", + " pred_class, _, pred_bbox = converter(predict[\"Main\"])\n", "pred_bbox[1:] = (pred_bbox[1: ] + total_shift[:, None]) / SLIDE\n", "pred_bbox = pred_bbox.view(1, -1, 4)\n", "pred_class = pred_class.view(1, -1, 80)\n", @@ -126,7 +136,7 @@ "metadata": {}, "outputs": [], "source": [ - "draw_bboxes(pil_image, predict_box, idx2label=cfg.class_list)" + "draw_bboxes(pil_image, predict_box, idx2label=cfg.dataset.class_list)" ] }, { diff --git a/examples/sample_inference.py b/examples/sample_inference.py index 2574515abafafacbc0e963abec92ecea54c32b3c..7b2b245273b72563cd132d062aa7111dfb8ba52d 100644 --- a/examples/sample_inference.py +++ b/examples/sample_inference.py @@ -2,29 +2,39 @@ import sys from pathlib import Path import hydra -import torch project_root = Path(__file__).resolve().parent.parent sys.path.append(str(project_root)) -from yolo.config.config import Config -from yolo.model.yolo import create_model -from yolo.tools.data_loader import create_dataloader -from yolo.tools.solver import ModelTester -from yolo.utils.logging_utils import custom_logger, validate_log_directory +from yolo import ( + Config, + FastModelLoader, + ModelTester, + ProgressLogger, + create_converter, + create_dataloader, + create_model, +) +from yolo.utils.model_utils import get_device -@hydra.main(config_path="../yolo/config", config_name="config", version_base=None) -def main(cfg: Config): - custom_logger() - save_path = validate_log_directory(cfg, cfg.name) - dataloader = create_dataloader(cfg) - - device = torch.device(cfg.device) - model = create_model(cfg).to(device) - tester = ModelTester(cfg, model, save_path, device) - tester.solve(dataloader) +@hydra.main(config_path="config", config_name="config", version_base=None) +def main(cfg: Config): + progress = ProgressLogger(cfg, exp_name=cfg.name) + device, use_ddp = get_device(cfg.device) + dataloader = create_dataloader(cfg.task.data, cfg.dataset, cfg.task.task, use_ddp) + if getattr(cfg.task, "fast_inference", False): + model = FastModelLoader(cfg).load_model(device) + else: + model = create_model(cfg.model, class_num=cfg.dataset.class_num, weight_path=cfg.weight) + model = model.to(device) + + converter = create_converter(cfg.model.name, model, cfg.model.anchor, cfg.image_size, device) + + solver = ModelTester(cfg, model, converter, progress, device) + progress.start() + solver.solve(dataloader) if __name__ == "__main__": diff --git a/examples/sample_train.py b/examples/sample_train.py index 2c8620958ca76334beba305bd1483e68af5243d1..dc917188a4a19e9577e43bae766c8bb11ab8d409 100644 --- a/examples/sample_train.py +++ b/examples/sample_train.py @@ -2,29 +2,35 @@ import sys from pathlib import Path import hydra -import torch project_root = Path(__file__).resolve().parent.parent sys.path.append(str(project_root)) -from yolo.config.config import Config -from yolo.model.yolo import create_model -from yolo.tools.data_loader import create_dataloader -from yolo.tools.solver import ModelTrainer -from yolo.utils.logging_utils import custom_logger, validate_log_directory +from yolo import ( + Config, + ModelTrainer, + ProgressLogger, + create_converter, + create_dataloader, + create_model, +) +from yolo.utils.model_utils import get_device -@hydra.main(config_path="../yolo/config", config_name="config", version_base=None) + +@hydra.main(config_path="config", config_name="config", version_base=None) def main(cfg: Config): - custom_logger() - save_path = validate_log_directory(cfg, cfg.name) - dataloader = create_dataloader(cfg) - # TODO: get_device or rank, for DDP mode - device = torch.device(cfg.device) - model = create_model(cfg).to(device) - - trainer = ModelTrainer(cfg, model, save_path, device) - trainer.solve(dataloader, cfg.task.epoch) + progress = ProgressLogger(cfg, exp_name=cfg.name) + device, use_ddp = get_device(cfg.device) + dataloader = create_dataloader(cfg.task.data, cfg.dataset, cfg.task.task, use_ddp) + model = create_model(cfg.model, class_num=cfg.dataset.class_num, weight_path=cfg.weight) + model = model.to(device) + + converter = create_converter(cfg.model.name, model, cfg.model.anchor, cfg.image_size, device) + + solver = ModelTrainer(cfg, model, converter, progress, device) + progress.start() + solver.solve(dataloader) if __name__ == "__main__": diff --git a/requirements-dev.txt b/requirements-dev.txt index fb7a8ad727d1fa06291125d2a41083eddd0897a5..966d9eb008980315da2997292b2552fa8419970b 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -3,3 +3,4 @@ pytest pytest-cov pre-commit pycocotools +tensorboard diff --git a/tests/test_tools/test_data_loader.py b/tests/test_tools/test_data_loader.py index 789c87692cbecb5f4ffe3270b4244d1fe6120383..0cfb9c65b1a8db59159e6c5bef6744767c9cba97 100644 --- a/tests/test_tools/test_data_loader.py +++ b/tests/test_tools/test_data_loader.py @@ -66,4 +66,4 @@ def test_directory_stream_data_loader_frame(directory_stream_data_loader: Stream frame, rev_tensor, origin_frame = next(iter(directory_stream_data_loader)) assert frame.shape == (1, 3, 640, 640) assert rev_tensor.shape == (1, 5) - assert origin_frame.size == (480, 640) or origin_frame.size == (512, 640) + assert origin_frame.size != (640, 640) diff --git a/yolo/__init__.py b/yolo/__init__.py index 8641398f8fa217d7e16a7fe91b4fd4a9967e6fb8..900de97de0f6c871af2e3ec1c410b5c157112766 100644 --- a/yolo/__init__.py +++ b/yolo/__init__.py @@ -5,12 +5,13 @@ from yolo.tools.drawer import draw_bboxes from yolo.tools.solver import ModelTester, ModelTrainer, ModelValidator from yolo.utils.bounding_box_utils import Anc2Box, Vec2Box, bbox_nms, create_converter from yolo.utils.deploy_utils import FastModelLoader -from yolo.utils.logging_utils import custom_logger +from yolo.utils.logging_utils import ProgressLogger, custom_logger from yolo.utils.model_utils import PostProccess all = [ "create_model", "Config", + "ProgressLogger", "NMSConfig", "custom_logger", "validate_log_directory", diff --git a/yolo/config/config.py b/yolo/config/config.py index 762fee98bdd303af294d60db9ceeb98065535f51..100fc5287eef165fbea9418064eee93ab3cda135 100644 --- a/yolo/config/config.py +++ b/yolo/config/config.py @@ -45,6 +45,8 @@ class DownloadOptions: @dataclass class DatasetConfig: path: str + class_num: int + class_list: List[str] auto_download: Optional[DownloadOptions] @@ -142,9 +144,6 @@ class Config: device: Union[str, int, List[int]] cpu_num: int - class_num: int - class_list: List[str] - class_idx_id: List[int] image_size: List[int] out_path: str @@ -152,7 +151,7 @@ class Config: lucky_number: 10 use_wandb: bool - use_TensorBoard: bool + use_tensorboard: bool weight: Optional[str] diff --git a/yolo/config/dataset/coco.yaml b/yolo/config/dataset/coco.yaml index 628dd32ecd7fc198ace6afd2bf9962404e08b281..6de4fa68289b1c72d0cdbf60d370705bed4c3887 100644 --- a/yolo/config/dataset/coco.yaml +++ b/yolo/config/dataset/coco.yaml @@ -2,6 +2,9 @@ path: data/coco train: train2017 validation: val2017 +class_num: 80 +class_list: ['Person', 'Bicycle', 'Car', 'Motorcycle', 'Airplane', 'Bus', 'Train', 'Truck', 'Boat', 'Traffic light', 'Fire hydrant', 'Stop sign', 'Parking meter', 'Bench', 'Bird', 'Cat', 'Dog', 'Horse', 'Sheep', 'Cow', 'Elephant', 'Bear', 'Zebra', 'Giraffe', 'Backpack', 'Umbrella', 'Handbag', 'Tie', 'Suitcase', 'Frisbee', 'Skis', 'Snowboard', 'Sports ball', 'Kite', 'Baseball bat', 'Baseball glove', 'Skateboard', 'Surfboard', 'Tennis racket', 'Bottle', 'Wine glass', 'Cup', 'Fork', 'Knife', 'Spoon', 'Bowl', 'Banana', 'Apple', 'Sandwich', 'Orange', 'Broccoli', 'Carrot', 'Hot dog', 'Pizza', 'Donut', 'Cake', 'Chair', 'Couch', 'Potted plant', 'Bed', 'Dining table', 'Toilet', 'Tv', 'Laptop', 'Mouse', 'Remote', 'Keyboard', 'Cell phone', 'Microwave', 'Oven', 'Toaster', 'Sink', 'Refrigerator', 'Book', 'Clock', 'Vase', 'Scissors', 'Teddy bear', 'Hair drier', 'Toothbrush'] + auto_download: images: base_url: http://images.cocodataset.org/zips/ diff --git a/yolo/config/dataset/dev.yaml b/yolo/config/dataset/dev.yaml index f5357bdfb9ce71b5d7c309ccacc4b9515318e09e..6f2584f8a04ff4fc1eb64b292be960c1ca14de70 100644 --- a/yolo/config/dataset/dev.yaml +++ b/yolo/config/dataset/dev.yaml @@ -2,4 +2,7 @@ path: data/dev train: train validation: val +class_num: 80 +class_list: ['Person', 'Bicycle', 'Car', 'Motorcycle', 'Airplane', 'Bus', 'Train', 'Truck', 'Boat', 'Traffic light', 'Fire hydrant', 'Stop sign', 'Parking meter', 'Bench', 'Bird', 'Cat', 'Dog', 'Horse', 'Sheep', 'Cow', 'Elephant', 'Bear', 'Zebra', 'Giraffe', 'Backpack', 'Umbrella', 'Handbag', 'Tie', 'Suitcase', 'Frisbee', 'Skis', 'Snowboard', 'Sports ball', 'Kite', 'Baseball bat', 'Baseball glove', 'Skateboard', 'Surfboard', 'Tennis racket', 'Bottle', 'Wine glass', 'Cup', 'Fork', 'Knife', 'Spoon', 'Bowl', 'Banana', 'Apple', 'Sandwich', 'Orange', 'Broccoli', 'Carrot', 'Hot dog', 'Pizza', 'Donut', 'Cake', 'Chair', 'Couch', 'Potted plant', 'Bed', 'Dining table', 'Toilet', 'Tv', 'Laptop', 'Mouse', 'Remote', 'Keyboard', 'Cell phone', 'Microwave', 'Oven', 'Toaster', 'Sink', 'Refrigerator', 'Book', 'Clock', 'Vase', 'Scissors', 'Teddy bear', 'Hair drier', 'Toothbrush'] + auto_download: diff --git a/yolo/config/dataset/mock.yaml b/yolo/config/dataset/mock.yaml index eedb7850db86ea2374b8a25124e162fdb07bc463..c7d58a10d090bad788f53bfccc6c6df50fc86454 100644 --- a/yolo/config/dataset/mock.yaml +++ b/yolo/config/dataset/mock.yaml @@ -2,6 +2,9 @@ path: tests/data train: train validation: val +class_num: 80 +class_list: ['Person', 'Bicycle', 'Car', 'Motorcycle', 'Airplane', 'Bus', 'Train', 'Truck', 'Boat', 'Traffic light', 'Fire hydrant', 'Stop sign', 'Parking meter', 'Bench', 'Bird', 'Cat', 'Dog', 'Horse', 'Sheep', 'Cow', 'Elephant', 'Bear', 'Zebra', 'Giraffe', 'Backpack', 'Umbrella', 'Handbag', 'Tie', 'Suitcase', 'Frisbee', 'Skis', 'Snowboard', 'Sports ball', 'Kite', 'Baseball bat', 'Baseball glove', 'Skateboard', 'Surfboard', 'Tennis racket', 'Bottle', 'Wine glass', 'Cup', 'Fork', 'Knife', 'Spoon', 'Bowl', 'Banana', 'Apple', 'Sandwich', 'Orange', 'Broccoli', 'Carrot', 'Hot dog', 'Pizza', 'Donut', 'Cake', 'Chair', 'Couch', 'Potted plant', 'Bed', 'Dining table', 'Toilet', 'Tv', 'Laptop', 'Mouse', 'Remote', 'Keyboard', 'Cell phone', 'Microwave', 'Oven', 'Toaster', 'Sink', 'Refrigerator', 'Book', 'Clock', 'Vase', 'Scissors', 'Teddy bear', 'Hair drier', 'Toothbrush'] + auto_download: images: base_url: https://github.com/WongKinYiu/yolov9mit/releases/download/v1.0-alpha/ diff --git a/yolo/config/general.yaml b/yolo/config/general.yaml index b88ede68f1d78fd46cc77b8bb96bc71ff885633d..38d3fbce235faa8db30c6448b7644d7b28e62095 100644 --- a/yolo/config/general.yaml +++ b/yolo/config/general.yaml @@ -1,8 +1,6 @@ device: 0 cpu_num: 16 -class_num: 80 -class_list: ['Person', 'Bicycle', 'Car', 'Motorcycle', 'Airplane', 'Bus', 'Train', 'Truck', 'Boat', 'Traffic light', 'Fire hydrant', 'Stop sign', 'Parking meter', 'Bench', 'Bird', 'Cat', 'Dog', 'Horse', 'Sheep', 'Cow', 'Elephant', 'Bear', 'Zebra', 'Giraffe', 'Backpack', 'Umbrella', 'Handbag', 'Tie', 'Suitcase', 'Frisbee', 'Skis', 'Snowboard', 'Sports ball', 'Kite', 'Baseball bat', 'Baseball glove', 'Skateboard', 'Surfboard', 'Tennis racket', 'Bottle', 'Wine glass', 'Cup', 'Fork', 'Knife', 'Spoon', 'Bowl', 'Banana', 'Apple', 'Sandwich', 'Orange', 'Broccoli', 'Carrot', 'Hot dog', 'Pizza', 'Donut', 'Cake', 'Chair', 'Couch', 'Potted plant', 'Bed', 'Dining table', 'Toilet', 'Tv', 'Laptop', 'Mouse', 'Remote', 'Keyboard', 'Cell phone', 'Microwave', 'Oven', 'Toaster', 'Sink', 'Refrigerator', 'Book', 'Clock', 'Vase', 'Scissors', 'Teddy bear', 'Hair drier', 'Toothbrush'] image_size: [640, 640] out_path: runs @@ -10,6 +8,6 @@ exist_ok: True lucky_number: 10 use_wandb: False -use_TensorBoard: False +use_tensorboard: False weight: True # Path to weight or True for auto, False for no pretrained weight diff --git a/yolo/config/task/train.yaml b/yolo/config/task/train.yaml index 85cea59731ad4012f16f43b90570db38be68757e..d3eab6cf0d8c53ebf9aafb26d6dd215a7a8520bf 100644 --- a/yolo/config/task/train.yaml +++ b/yolo/config/task/train.yaml @@ -14,7 +14,9 @@ data: data_augment: Mosaic: 1 # MixUp: 1 - HorizontalFlip: 0.5 + # HorizontalFlip: 0.5 + RandomCrop: 1 + RemoveOutliers: 1e-8 optimizer: type: SGD diff --git a/yolo/lazy.py b/yolo/lazy.py index adede672c5f87377da1a2f5910d58a0387a19c7e..1bc5577ed36a5277df74953d91d80ffc4d80e3fa 100644 --- a/yolo/lazy.py +++ b/yolo/lazy.py @@ -24,7 +24,7 @@ def main(cfg: Config): if getattr(cfg.task, "fast_inference", False): model = FastModelLoader(cfg).load_model(device) else: - model = create_model(cfg.model, class_num=cfg.class_num, weight_path=cfg.weight) + model = create_model(cfg.model, class_num=cfg.dataset.class_num, weight_path=cfg.weight) model = model.to(device) converter = create_converter(cfg.model.name, model, cfg.model.anchor, cfg.image_size, device) diff --git a/yolo/model/module.py b/yolo/model/module.py index a1ae1f5b99ac9348f37533bcdba32435b64fcc67..e4b778e3968a26f562e00c85e33298f154d986fb 100644 --- a/yolo/model/module.py +++ b/yolo/model/module.py @@ -198,7 +198,7 @@ class RepConv(nn.Module): return self.act(self.conv1(x) + self.conv2(x)) -class RepNBottleneck(nn.Module): +class Bottleneck(nn.Module): """A bottleneck block with optional residual connections.""" def __init__( @@ -250,7 +250,7 @@ class RepNCSP(nn.Module): self.conv3 = Conv(2 * neck_channels, out_channels, kernel_size, **kwargs) self.bottleneck = nn.Sequential( - *[RepNBottleneck(neck_channels, neck_channels, **neck_args) for _ in range(repeat_num)] + *[Bottleneck(neck_channels, neck_channels, **neck_args) for _ in range(repeat_num)] ) def forward(self, x: torch.Tensor) -> torch.Tensor: diff --git a/yolo/model/yolo.py b/yolo/model/yolo.py index 51ee759433849161a6143d5a1e9f9be24363fbc2..c7b98acd2753cd7e80a8cba6fe65c3f56edbc6b5 100644 --- a/yolo/model/yolo.py +++ b/yolo/model/yolo.py @@ -1,3 +1,4 @@ +from collections import OrderedDict from pathlib import Path from typing import Dict, List, Union @@ -114,6 +115,39 @@ class YOLO(nn.Module): else: raise ValueError(f"Unsupported layer type: {layer_type}") + def save_load_weights(self, weights: Union[Path, OrderedDict]): + """ + Update the model's weights with the provided weights. + + args: + weights: A OrderedDict containing the new weights. + """ + if isinstance(weights, Path): + weights = torch.load(weights, map_location=torch.device("cpu")) + if "model_state_dict" in weights: + weights = weights["model_state_dict"] + + model_state_dict = self.model.state_dict() + + # TODO1: autoload old version weight + # TODO2: weight transform if num_class difference + + error_dict = {"Mismatch": set(), "Not Found": set()} + for model_key, model_weight in model_state_dict.items(): + if model_key not in weights: + error_dict["Not Found"].add(tuple(model_key.split(".")[:-2])) + continue + if model_weight.shape != weights[model_key].shape: + error_dict["Mismatch"].add(tuple(model_key.split(".")[:-2])) + continue + model_state_dict[model_key] = weights[model_key] + + for error_name, error_set in error_dict.items(): + for weight_name in error_set: + logger.warning(f"⚠️ Weight {error_name} for key: {'.'.join(weight_name)}") + + self.model.load_state_dict(model_state_dict) + def create_model(model_cfg: ModelConfig, weight_path: Union[bool, Path] = True, class_num: int = 80) -> YOLO: """Constructs and returns a model from a Dictionary configuration file. @@ -129,11 +163,14 @@ def create_model(model_cfg: ModelConfig, weight_path: Union[bool, Path] = True, if weight_path: if weight_path == True: weight_path = Path("weights") / f"{model_cfg.name}.pt" + elif isinstance(weight_path, str): + weight_path = Path(weight_path) + if not weight_path.exists(): logger.info(f"🌐 Weight {weight_path} not found, try downloading") prepare_weight(weight_path=weight_path) if weight_path.exists(): - model.model.load_state_dict(torch.load(weight_path, map_location=torch.device("cpu")), strict=False) + model.save_load_weights(weight_path) logger.info("✅ Success load model & weight") else: logger.info("✅ Success load model") diff --git a/yolo/tools/data_augmentation.py b/yolo/tools/data_augmentation.py index 0887c9034848670951856af091036f1e872fbd14..4751b22fc14c53f1ece11be40503b7d49d41bba0 100644 --- a/yolo/tools/data_augmentation.py +++ b/yolo/tools/data_augmentation.py @@ -25,7 +25,30 @@ class AugmentationComposer: return image, boxes, rev_tensor -# TODO: RandomCrop, Resize, ... etc. +class RemoveOutliers: + """Removes outlier bounding boxes that are too small or have invalid dimensions.""" + + def __init__(self, min_box_area=1e-8): + """ + Args: + min_box_area (float): Minimum area for a box to be kept, as a fraction of the image area. + """ + self.min_box_area = min_box_area + + def __call__(self, image, boxes): + """ + Args: + image (PIL.Image): The cropped image. + boxes (torch.Tensor): Bounding boxes in normalized coordinates (x_min, y_min, x_max, y_max). + Returns: + PIL.Image: The input image (unchanged). + torch.Tensor: Filtered bounding boxes. + """ + box_areas = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 4] - boxes[:, 2]) + + valid_boxes = (box_areas > self.min_box_area) & (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 4] > boxes[:, 2]) + + return image, boxes[valid_boxes] class PadAndResize: @@ -155,3 +178,34 @@ class MixUp: mixed_boxes = torch.cat([lam * boxes, (1 - lam) * boxes2]) return TF.to_pil_image(mixed_image), mixed_boxes + + +class RandomCrop: + """Randomly crops the image to half its size along with adjusting the bounding boxes.""" + + def __init__(self, prob=0.5): + """ + Args: + prob (float): Probability of applying the crop. + """ + self.prob = prob + + def __call__(self, image, boxes): + if torch.rand(1) < self.prob: + original_width, original_height = image.size + crop_height, crop_width = original_height // 2, original_width // 2 + top = torch.randint(0, original_height - crop_height + 1, (1,)).item() + left = torch.randint(0, original_width - crop_width + 1, (1,)).item() + + image = TF.crop(image, top, left, crop_height, crop_width) + + boxes[:, [1, 3]] = boxes[:, [1, 3]] * original_width - left + boxes[:, [2, 4]] = boxes[:, [2, 4]] * original_height - top + + boxes[:, [1, 3]] = boxes[:, [1, 3]].clamp(0, crop_width) + boxes[:, [2, 4]] = boxes[:, [2, 4]].clamp(0, crop_height) + + boxes[:, [1, 3]] /= crop_width + boxes[:, [2, 4]] /= crop_height + + return image, boxes diff --git a/yolo/tools/data_loader.py b/yolo/tools/data_loader.py index c3555117377b2704ae7756f5a9b3bf596dea1b60..9ceb455f1a0279c2d519eeb93ea7825c18f28741 100644 --- a/yolo/tools/data_loader.py +++ b/yolo/tools/data_loader.py @@ -13,13 +13,8 @@ from torch.utils.data import DataLoader, Dataset from torch.utils.data.distributed import DistributedSampler from yolo.config.config import DataConfig, DatasetConfig -from yolo.tools.data_augmentation import ( - AugmentationComposer, - HorizontalFlip, - MixUp, - Mosaic, - VerticalFlip, -) +from yolo.tools.data_augmentation import * +from yolo.tools.data_augmentation import AugmentationComposer from yolo.tools.dataset_preparation import prepare_dataset from yolo.utils.dataset_utils import ( create_image_metadata, diff --git a/yolo/tools/drawer.py b/yolo/tools/drawer.py index ab099d5acf45c67b97f32761daa686d0f360fa3e..4f0b220ee8ae7192c852e2f919835e17d0f3854d 100644 --- a/yolo/tools/drawer.py +++ b/yolo/tools/drawer.py @@ -32,7 +32,10 @@ def draw_bboxes( img = img[0] img = to_pil_image(img) - img, bboxes = img.copy(), bboxes[0] + if isinstance(bboxes, list) or bboxes.ndim == 3: + bboxes = bboxes[0] + + img = img.copy() label_size = img.size[1] / 30 draw = ImageDraw.Draw(img, "RGBA") @@ -43,6 +46,8 @@ def draw_bboxes( for bbox in bboxes: class_id, x_min, y_min, x_max, y_max, *conf = [float(val) for val in bbox] + x_min, x_max = min(x_min, x_max), max(x_min, x_max) + y_min, y_max = min(y_min, y_max), max(y_min, y_max) bbox = [(x_min, y_min), (x_max, y_max)] random.seed(int(class_id)) diff --git a/yolo/tools/loss_functions.py b/yolo/tools/loss_functions.py index b4b5e9c3d576c7544f9b67a2957e7bf064ce6f62..f426c972f3da3cf17a1bfcdf7e4e9d7a3c5c190e 100644 --- a/yolo/tools/loss_functions.py +++ b/yolo/tools/loss_functions.py @@ -109,7 +109,7 @@ class YOLOLoss: class DualLoss: def __init__(self, cfg: Config, vec2box) -> None: loss_cfg = cfg.task.loss - self.loss = YOLOLoss(loss_cfg, vec2box, class_num=cfg.class_num, reg_max=cfg.model.anchor.reg_max) + self.loss = YOLOLoss(loss_cfg, vec2box, class_num=cfg.dataset.class_num, reg_max=cfg.model.anchor.reg_max) self.aux_rate = loss_cfg.aux diff --git a/yolo/tools/solver.py b/yolo/tools/solver.py index d21f5bcacb8fbc9d094a001727f252f93d02aaa8..1440a640204e339679d1a3172da2c4b80f553363 100644 --- a/yolo/tools/solver.py +++ b/yolo/tools/solver.py @@ -162,7 +162,7 @@ class ModelTester: self.save_path = progress.save_path / "images" os.makedirs(self.save_path, exist_ok=True) self.save_predict = getattr(cfg.task, "save_predict", None) - self.idx2label = cfg.class_list + self.idx2label = cfg.dataset.class_list def solve(self, dataloader: StreamDataLoader): logger.info("👀 Start Inference!") @@ -231,7 +231,7 @@ class ModelValidator: if json_path: self.coco_gt = COCO(json_path) - def solve(self, dataloader, epoch_idx=-1): + def solve(self, dataloader, epoch_idx=1): # logger.info("🧪 Start Validation!") self.model.eval() predict_json, mAPs = [], defaultdict(list) @@ -251,6 +251,7 @@ class ModelValidator: predict_json.extend(predicts_to_json(img_paths, predicts, rev_tensor)) self.progress.finish_one_epoch(avg_mAPs, epoch_idx=epoch_idx) + self.progress.visualize_image(images, targets, predicts, epoch_idx=epoch_idx) with open(self.json_path, "w") as f: json.dump(predict_json, f) diff --git a/yolo/utils/bounding_box_utils.py b/yolo/utils/bounding_box_utils.py index 47c1636a695a560eb0317f8831375386392f5aa5..c53c47af40ba2cc148332016fef45381d96a66b5 100644 --- a/yolo/utils/bounding_box_utils.py +++ b/yolo/utils/bounding_box_utils.py @@ -217,6 +217,7 @@ class BoxMatcher: Returns: unique_indices [batch x anchors x 1]: The index of the best targets for each anchors """ + # TODO: add a assert for no target on the image unique_indices = target_matrix.argmax(dim=1) return unique_indices[..., None] diff --git a/yolo/utils/dataset_utils.py b/yolo/utils/dataset_utils.py index be906975fb67c98f72f288b6c0628da2dbf26083..a6c6e1fdafe4e1fb95068a7f692d742ca7033ecf 100644 --- a/yolo/utils/dataset_utils.py +++ b/yolo/utils/dataset_utils.py @@ -100,7 +100,10 @@ def scale_segmentation( h, w = image_dimensions["height"], image_dimensions["width"] for anno in annotations: category_id = anno["category_id"] - seg_list = [item for sublist in anno["segmentation"] for item in sublist] + if "segmentation" in anno: + seg_list = [item for sublist in anno["segmentation"] for item in sublist] + elif "bbox" in anno: + seg_list = anno["bbox"] scaled_seg_data = ( np.array(seg_list).reshape(-1, 2) / [w, h] ).tolist() # make the list group in x, y pairs and scaled with image width, height diff --git a/yolo/utils/deploy_utils.py b/yolo/utils/deploy_utils.py index 8d6e53192dda175e82847f3e6b7f1ea91df9a98d..9ca709b016ee10a470b8cfcd6cd9be14fc3b48d9 100644 --- a/yolo/utils/deploy_utils.py +++ b/yolo/utils/deploy_utils.py @@ -12,6 +12,8 @@ class FastModelLoader: def __init__(self, cfg: Config): self.cfg = cfg self.compiler = cfg.task.fast_inference + self.class_num = cfg.dataset.class_num + self._validate_compiler() if cfg.weight == True: cfg.weight = Path("weights") / f"{cfg.model.name}.pt" @@ -32,7 +34,7 @@ class FastModelLoader: return self._load_trt_model().to(device) elif self.compiler == "deploy": self.cfg.model.model.auxiliary = {} - return create_model(self.cfg.model, class_num=self.cfg.class_num, weight_path=self.cfg.weight).to(device) + return create_model(self.cfg.model, class_num=self.class_num, weight_path=self.cfg.weight).to(device) def _load_onnx_model(self, device): from onnxruntime import InferenceSession @@ -67,7 +69,7 @@ class FastModelLoader: from onnxruntime import InferenceSession from torch.onnx import export - model = create_model(self.cfg.model, class_num=self.cfg.class_num, weight_path=self.cfg.weight).eval() + model = create_model(self.cfg.model, class_num=self.class_num, weight_path=self.cfg.weight).eval() dummy_input = torch.ones((1, 3, *self.cfg.image_size)) export( model, @@ -95,7 +97,7 @@ class FastModelLoader: def _create_trt_model(self): from torch2trt import torch2trt - model = create_model(self.cfg.model, class_num=self.cfg.class_num, weight_path=self.cfg.weight).eval() + model = create_model(self.cfg.model, class_num=self.class_num, weight_path=self.cfg.weight).eval() dummy_input = torch.ones((1, 3, *self.cfg.image_size)).cuda() logger.info(f"♻️ Creating TensorRT model") model_trt = torch2trt(model.cuda(), [dummy_input]) diff --git a/yolo/utils/logging_utils.py b/yolo/utils/logging_utils.py index 90d790b74547d882daa7acee16ee34a34c9a734a..fceded327b5631c81b51ba736584609819fe07c8 100644 --- a/yolo/utils/logging_utils.py +++ b/yolo/utils/logging_utils.py @@ -16,7 +16,7 @@ import random import sys from collections import deque from pathlib import Path -from typing import Any, Dict, Union +from typing import Any, Dict, List, Optional, Tuple, Union import numpy as np import torch @@ -36,9 +36,11 @@ from rich.table import Table from torch import Tensor from torch.nn import ModuleList from torch.optim import Optimizer +from torchvision.transforms.functional import pil_to_tensor from yolo.config.config import Config, YOLOLayer from yolo.model.yolo import YOLO +from yolo.tools.drawer import draw_bboxes from yolo.utils.solver_utils import make_ap_table @@ -93,6 +95,13 @@ class ProgressLogger(Progress): project="YOLO", resume="allow", mode="online", dir=self.save_path, id=None, name=exp_name ) + self.use_tensorboard = cfg.use_tensorboard + if self.use_tensorboard: + from torch.utils.tensorboard import SummaryWriter + + self.tb_writer = SummaryWriter(log_dir=self.save_path / "tensorboard") + logger.opt(colors=True).info(f"📍 Enable TensorBoard locally at http://localhost:6006") + def rank_check(logging_function): def wrapper(self, *args, **kwargs): if getattr(self, "local_rank", 0) != 0: @@ -118,11 +127,17 @@ class ProgressLogger(Progress): if hasattr(self, "task_epoch"): self.update(self.task_epoch, description=f"[cyan] Preparing Data") - if self.use_wandb and optimizer is not None: + if optimizer is not None: lr_values = [params["lr"] for params in optimizer.param_groups] lr_names = ["Learning Rate/bias", "Learning Rate/norm", "Learning Rate/conv"] - for lr_name, lr_value in zip(lr_names, lr_values): - self.wandb.log({lr_name: lr_value}, step=epoch_idx) + if self.use_wandb: + for lr_name, lr_value in zip(lr_names, lr_values): + self.wandb.log({lr_name: lr_value}, step=epoch_idx) + + if self.use_tensorboard: + for lr_name, lr_value in zip(lr_names, lr_values): + self.tb_writer.add_scalar(lr_name, lr_value, global_step=epoch_idx) + self.batch_task = self.add_task(f"[green] Phase: {task}", total=num_batches) @rank_check @@ -141,14 +156,62 @@ class ProgressLogger(Progress): @rank_check def finish_one_epoch(self, batch_info: Dict[str, Any] = None, epoch_idx: int = -1): if self.task == "Train": - prefix = "Loss/" + prefix = "Loss" elif self.task == "Validate": - prefix = "Metrics/" - batch_info = {f"{prefix}{key}": value for key, value in batch_info.items()} + prefix = "Metrics" + batch_info = {f"{prefix}/{key}": value for key, value in batch_info.items()} if self.use_wandb: self.wandb.log(batch_info, step=epoch_idx) + if self.use_tensorboard: + for key, value in batch_info.items(): + self.tb_writer.add_scalar(key, value, epoch_idx) + self.remove_task(self.batch_task) + @rank_check + def visualize_image( + self, + images: Optional[Tensor] = None, + ground_truth: Optional[Tensor] = None, + prediction: Optional[Union[List[Tensor], Tensor]] = None, + epoch_idx: int = 0, + ) -> None: + """ + Upload the ground truth bounding boxes, predicted bounding boxes, and the original image to wandb or TensorBoard. + + Args: + images (Optional[Tensor]): Tensor of images with shape (BZ, 3, 640, 640). + ground_truth (Optional[Tensor]): Ground truth bounding boxes with shape (BZ, N, 5) or (N, 5). Defaults to None. + prediction (prediction: Optional[Union[List[Tensor], Tensor]]): List of predicted bounding boxes with shape (N, 6) or (N, 6). Defaults to None. + epoch_idx (int): Current epoch index. Defaults to 0. + """ + if images is not None: + images = images[0] if images.ndim == 4 else images + if self.use_wandb: + wandb.log({"Input Image": wandb.Image(images)}, step=epoch_idx) + if self.use_tensorboard: + self.tb_writer.add_image("Media/Input Image", images, 1) + + if ground_truth is not None: + gt_boxes = ground_truth[0] if ground_truth.ndim == 3 else ground_truth + if self.use_wandb: + wandb.log( + {"Ground Truth": wandb.Image(images, boxes={"predictions": {"box_data": log_bbox(gt_boxes)}})}, + step=epoch_idx, + ) + if self.use_tensorboard: + self.tb_writer.add_image("Media/Ground Truth", pil_to_tensor(draw_bboxes(images, gt_boxes)), epoch_idx) + + if prediction is not None: + pred_boxes = prediction[0] if isinstance(prediction, list) else prediction + if self.use_wandb: + wandb.log( + {"Prediction": wandb.Image(images, boxes={"predictions": {"box_data": log_bbox(pred_boxes)}})}, + step=epoch_idx, + ) + if self.use_tensorboard: + self.tb_writer.add_image("Media/Prediction", pil_to_tensor(draw_bboxes(images, pred_boxes)), epoch_idx) + @rank_check def start_pycocotools(self): self.batch_task = self.add_task("[green]Run pycocotools", total=1) @@ -162,6 +225,11 @@ class ProgressLogger(Progress): if self.use_wandb: self.wandb.log({"PyCOCO/AP @ .5:.95": ap_main[2], "PyCOCO/AP @ .5": ap_main[5]}) + if self.use_tensorboard: + # TODO: waiting torch bugs fix, https://github.com/pytorch/pytorch/issues/32651 + self.tb_writer.add_scalar("PyCOCO/AP @ .5:.95", ap_main[2], epoch_idx) + self.tb_writer.add_scalar("PyCOCO/AP @ .5", ap_main[5], epoch_idx) + self.update(self.batch_task, advance=1) self.refresh() self.remove_task(self.batch_task) @@ -172,6 +240,8 @@ class ProgressLogger(Progress): self.stop() if self.use_wandb: self.wandb.finish() + if self.use_tensorboard: + self.tb_writer.close() def custom_wandb_log(string="", level=int, newline=True, repeat=True, prefix=True, silent=False): @@ -228,3 +298,37 @@ def validate_log_directory(cfg: Config, exp_name: str) -> Path: logger.opt(colors=True).info(f"📄 Created log folder: {save_path}") logger.add(save_path / "output.log", mode="w", backtrace=True, diagnose=True) return save_path + + +def log_bbox( + bboxes: Tensor, class_list: Optional[List[str]] = None, image_size: Tuple[int, int] = (640, 640) +) -> List[dict]: + """ + Convert bounding boxes tensor to a list of dictionaries for logging, normalized by the image size. + + Args: + bboxes (Tensor): Bounding boxes with shape (N, 5) or (N, 6), where each box is [class_id, x_min, y_min, x_max, y_max, (confidence)]. + class_list (Optional[List[str]]): List of class names. Defaults to None. + image_size (Tuple[int, int]): The size of the image, used for normalization. Defaults to (640, 640). + + Returns: + List[dict]: List of dictionaries containing normalized bounding box information. + """ + bbox_list = [] + scale_tensor = torch.Tensor([1, *image_size, *image_size]).to(bboxes.device) + normalized_bboxes = bboxes[:, :5] / scale_tensor + for bbox in normalized_bboxes: + class_id, x_min, y_min, x_max, y_max, *conf = [float(val) for val in bbox] + if class_id == -1: + break + bbox_entry = { + "position": {"minX": x_min, "maxX": x_max, "minY": y_min, "maxY": y_max}, + "class_id": int(class_id), + } + if class_list: + bbox_entry["box_caption"] = class_list[int(class_id)] + if conf: + bbox_entry["scores"] = {"confidence": conf[0]} + bbox_list.append(bbox_entry) + + return bbox_list