{ "architecture": "shvit_s3", "num_classes": 1000, "num_features": 448, "pretrained_cfg": { "tag": "in1k", "custom_load": false, "input_size": [ 3, 224, 224 ], "fixed_input_size": false, "interpolation": "bicubic", "crop_pct": 0.875, "crop_mode": "center", "mean": [ 0.485, 0.456, 0.406 ], "std": [ 0.229, 0.224, 0.225 ], "num_classes": 1000, "pool_size": [ 4, 4 ], "first_conv": "patch_embed.0.c", "classifier": "head.l", "origin_url": "https://github.com/ysj9909/SHViT", "paper_name": "SHViT: Single-Head Vision Transformer with Memory Efficient Macro Design", "paper_ids": "arXiv:2401.16456" } }