|
name: v9-c |
|
|
|
anchor: |
|
reg_max: 16 |
|
strides: [8, 16, 32] |
|
|
|
model: |
|
backbone: |
|
- Conv: |
|
args: {out_channels: 64, kernel_size: 3, stride: 2} |
|
source: 0 |
|
- Conv: |
|
args: {out_channels: 128, kernel_size: 3, stride: 2} |
|
- RepNCSPELAN: |
|
args: {out_channels: 256, part_channels: 128} |
|
|
|
- ADown: |
|
args: {out_channels: 256} |
|
- RepNCSPELAN: |
|
args: {out_channels: 512, part_channels: 256} |
|
tags: B3 |
|
|
|
- ADown: |
|
args: {out_channels: 512} |
|
- RepNCSPELAN: |
|
args: {out_channels: 512, part_channels: 512} |
|
tags: B4 |
|
|
|
- ADown: |
|
args: {out_channels: 512} |
|
- RepNCSPELAN: |
|
args: {out_channels: 512, part_channels: 512} |
|
tags: B5 |
|
|
|
neck: |
|
- SPPELAN: |
|
args: {out_channels: 512} |
|
tags: N3 |
|
|
|
- UpSample: |
|
args: {scale_factor: 2, mode: nearest} |
|
- Concat: |
|
source: [-1, B4] |
|
- RepNCSPELAN: |
|
args: {out_channels: 512, part_channels: 512} |
|
tags: N4 |
|
|
|
- UpSample: |
|
args: {scale_factor: 2, mode: nearest} |
|
- Concat: |
|
source: [-1, B3] |
|
|
|
head: |
|
- RepNCSPELAN: |
|
args: {out_channels: 256, part_channels: 256} |
|
tags: P3 |
|
|
|
- ADown: |
|
args: {out_channels: 256} |
|
- Concat: |
|
source: [-1, N4] |
|
- RepNCSPELAN: |
|
args: {out_channels: 512, part_channels: 512} |
|
tags: P4 |
|
|
|
- ADown: |
|
args: {out_channels: 512} |
|
- Concat: |
|
source: [-1, N3] |
|
- RepNCSPELAN: |
|
args: {out_channels: 512, part_channels: 512} |
|
tags: P5 |
|
|
|
detection: |
|
- MultiheadDetection: |
|
source: [P3, P4, P5] |
|
tags: Main |
|
output: True |
|
|
|
auxiliary: |
|
- CBLinear: |
|
source: B3 |
|
args: {out_channels: [256]} |
|
tags: R3 |
|
- CBLinear: |
|
source: B4 |
|
args: {out_channels: [256, 512]} |
|
tags: R4 |
|
- CBLinear: |
|
source: B5 |
|
args: {out_channels: [256, 512, 512]} |
|
tags: R5 |
|
|
|
- Conv: |
|
args: {out_channels: 64, kernel_size: 3, stride: 2} |
|
source: 0 |
|
- Conv: |
|
args: {out_channels: 128, kernel_size: 3, stride: 2} |
|
- RepNCSPELAN: |
|
args: {out_channels: 256, part_channels: 128} |
|
|
|
- ADown: |
|
args: {out_channels: 256} |
|
- CBFuse: |
|
source: [R3, R4, R5, -1] |
|
args: {index: [0, 0, 0]} |
|
- RepNCSPELAN: |
|
args: {out_channels: 512, part_channels: 256} |
|
tags: A3 |
|
|
|
- ADown: |
|
args: {out_channels: 512} |
|
- CBFuse: |
|
source: [R4, R5, -1] |
|
args: {index: [1, 1]} |
|
- RepNCSPELAN: |
|
args: {out_channels: 512, part_channels: 512} |
|
tags: A4 |
|
|
|
- ADown: |
|
args: {out_channels: 512} |
|
- CBFuse: |
|
source: [R5, -1] |
|
args: {index: [2]} |
|
- RepNCSPELAN: |
|
args: {out_channels: 512, part_channels: 512} |
|
tags: A5 |
|
|
|
- MultiheadDetection: |
|
source: [A3, A4, A5] |
|
tags: AUX |
|
output: True |
|
|