bfshi-nvidia commited on
Commit
230603b
·
verified ·
1 Parent(s): bc676a5

Upload folder using huggingface_hub

Browse files
config.json ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "PS3Model"
4
+ ],
5
+ "model_type": "ps3",
6
+ "vision_config": {
7
+ "architectures": [
8
+ "PS3VisionModel"
9
+ ],
10
+ "model_type": "ps3_vision_model",
11
+ "model_name": "vit_so400m_patch14_siglip_384",
12
+ "hidden_size": 1152,
13
+ "pool": "map",
14
+ "s3_scales": [
15
+ 378,
16
+ 756,
17
+ 1512,
18
+ 3780
19
+ ],
20
+ "select_based_on_layer": [
21
+ 0,
22
+ 9,
23
+ 18,
24
+ 26
25
+ ],
26
+ "min_select_num": 1,
27
+ "max_select_num": 2560,
28
+ "seperate_pos_emb": true,
29
+ "highres_selection_feature": true,
30
+ "radio": false,
31
+ "radio_adapter_mlp_version": null,
32
+ "radio_adapter_mlp_input_dim": null,
33
+ "radio_adapter_mlp_hidden_dim": null,
34
+ "radio_adapter_mlp_output_dim": null,
35
+ "radio_adapter_mlp_num_inner": null,
36
+ "img_size": null,
37
+ "drop": 0.0,
38
+ "class_token": null,
39
+ "final_norm": false
40
+ },
41
+ "text_config": {
42
+ "context_length": 64,
43
+ "vocab_size": 32000,
44
+ "hf_tokenizer_name": "timm/ViT-B-16-SigLIP",
45
+ "tokenizer_kwargs": {
46
+ "clean": "canonicalize"
47
+ },
48
+ "width": 1152,
49
+ "heads": 16,
50
+ "layers": 27,
51
+ "mlp_ratio": 3.7362,
52
+ "no_causal_mask": true,
53
+ "proj_bias": true,
54
+ "pool_type": "last",
55
+ "norm_kwargs": {
56
+ "eps": 1e-06
57
+ },
58
+ "architectures": [
59
+ "PS3TextModel"
60
+ ],
61
+ "model_type": "ps3_text_model",
62
+ "output_dim": 1152,
63
+ "prompt_proj_dim": 1152
64
+ }
65
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6394d69e47b2ece97130d604b11c19198ccff9d4f248d9a96c1866b2db643b4
3
+ size 4112866144
preprocessor_config.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "image_size": [
3
+ 3780,
4
+ 3780
5
+ ],
6
+ "mean": [
7
+ 0.5,
8
+ 0.5,
9
+ 0.5
10
+ ],
11
+ "std": [
12
+ 0.5,
13
+ 0.5,
14
+ 0.5
15
+ ],
16
+ "interpolation": "bicubic",
17
+ "resize_mode": "squash"
18
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "tokenizer_name": "timm/ViT-B-16-SigLIP",
3
+ "context_length": 64,
4
+ "clean": "canonicalize"
5
+ }