{ | |
"architectures": [ | |
"PS3Model" | |
], | |
"model_type": "ps3", | |
"vision_config": { | |
"architectures": [ | |
"PS3VisionModel" | |
], | |
"model_type": "ps3_vision_model", | |
"model_name": "vit_so400m_patch14_siglip_384", | |
"hidden_size": 1152, | |
"pool": "map", | |
"ps3_scales": [ | |
378, | |
756, | |
1512, | |
3780 | |
], | |
"select_based_on_layer": [ | |
0, | |
9, | |
18, | |
26 | |
], | |
"min_select_num": 1, | |
"max_select_num": 2560, | |
"seperate_pos_emb": true, | |
"highres_selection_feature": true, | |
"radio": false, | |
"radio_adapter_mlp_version": null, | |
"radio_adapter_mlp_input_dim": null, | |
"radio_adapter_mlp_hidden_dim": null, | |
"radio_adapter_mlp_output_dim": null, | |
"radio_adapter_mlp_num_inner": null, | |
"img_size": null, | |
"drop": 0.0, | |
"class_token": null, | |
"final_norm": false | |
}, | |
"text_config": { | |
"context_length": 64, | |
"vocab_size": 32000, | |
"hf_tokenizer_name": "timm/ViT-B-16-SigLIP", | |
"tokenizer_kwargs": { | |
"clean": "canonicalize" | |
}, | |
"width": 1152, | |
"heads": 16, | |
"layers": 27, | |
"mlp_ratio": 3.7362, | |
"no_causal_mask": true, | |
"proj_bias": true, | |
"pool_type": "last", | |
"norm_kwargs": { | |
"eps": 1e-06 | |
}, | |
"architectures": [ | |
"PS3TextModel" | |
], | |
"model_type": "ps3_text_model", | |
"output_dim": 1152, | |
"prompt_proj_dim": 1152 | |
} | |
} | |