PS3-4K-SigLIP / config.json
bfshi-nvidia's picture
Upload folder using huggingface_hub
557d006 verified
{
"architectures": [
"PS3Model"
],
"model_type": "ps3",
"vision_config": {
"architectures": [
"PS3VisionModel"
],
"model_type": "ps3_vision_model",
"model_name": "vit_so400m_patch14_siglip_384",
"hidden_size": 1152,
"pool": "map",
"ps3_scales": [
378,
756,
1512,
3780
],
"select_based_on_layer": [
0,
9,
18,
26
],
"min_select_num": 1,
"max_select_num": 2560,
"seperate_pos_emb": true,
"highres_selection_feature": true,
"radio": false,
"radio_adapter_mlp_version": null,
"radio_adapter_mlp_input_dim": null,
"radio_adapter_mlp_hidden_dim": null,
"radio_adapter_mlp_output_dim": null,
"radio_adapter_mlp_num_inner": null,
"img_size": null,
"drop": 0.0,
"class_token": null,
"final_norm": false
},
"text_config": {
"context_length": 64,
"vocab_size": 32000,
"hf_tokenizer_name": "timm/ViT-B-16-SigLIP",
"tokenizer_kwargs": {
"clean": "canonicalize"
},
"width": 1152,
"heads": 16,
"layers": 27,
"mlp_ratio": 3.7362,
"no_causal_mask": true,
"proj_bias": true,
"pool_type": "last",
"norm_kwargs": {
"eps": 1e-06
},
"architectures": [
"PS3TextModel"
],
"model_type": "ps3_text_model",
"output_dim": 1152,
"prompt_proj_dim": 1152
}
}