Kikinoking
/

MNLP_M2_quantized_model

@@ -5,31 +5,10 @@
   "attention_bias": false,
   "attention_dropout": 0.0,
   "bos_token_id": 151643,
-  "config_groups": {
-    "group_0": {
-      "input_activations": {
-        "num_bits": 8,
-        "strategy": "tensor",
-        "type": "int"
-      },
-      "targets": [
-        "Linear"
-      ],
-      "weights": {
-        "num_bits": 8,
-        "strategy": "tensor",
-        "type": "int"
-      }
-    }
-  },
   "eos_token_id": 151643,
-  "format": "naive-quantized",
   "head_dim": 128,
   "hidden_act": "silu",
   "hidden_size": 1024,
-  "ignore": [
-    "lm_head"
-  ],
   "initializer_range": 0.02,
   "intermediate_size": 3072,
   "max_position_embeddings": 32768,
@@ -38,14 +17,55 @@
   "num_attention_heads": 16,
   "num_hidden_layers": 28,
   "num_key_value_heads": 8,
-  "quant_method": "compressed-tensors",
-  "quantization_status": "frozen",
   "rms_norm_eps": 1e-06,
   "rope_scaling": null,
   "rope_theta": 1000000,
   "sliding_window": null,
   "tie_word_embeddings": true,
-  "torch_dtype": "float32",
   "transformers_version": "4.51.3",
   "use_cache": true,
   "use_sliding_window": false,

   "attention_bias": false,
   "attention_dropout": 0.0,
   "bos_token_id": 151643,
   "eos_token_id": 151643,
   "head_dim": 128,
   "hidden_act": "silu",
   "hidden_size": 1024,
   "initializer_range": 0.02,
   "intermediate_size": 3072,
   "max_position_embeddings": 32768,
   "num_attention_heads": 16,
   "num_hidden_layers": 28,
   "num_key_value_heads": 8,
+  "quantization_config": {
+    "config_groups": {
+      "group_0": {
+        "input_activations": {
+          "actorder": null,
+          "block_structure": null,
+          "dynamic": false,
+          "group_size": null,
+          "num_bits": 8,
+          "observer": "minmax",
+          "observer_kwargs": {},
+          "strategy": "tensor",
+          "symmetric": true,
+          "type": "int"
+        },
+        "output_activations": null,
+        "targets": [
+          "Linear"
+        ],
+        "weights": {
+          "actorder": null,
+          "block_structure": null,
+          "dynamic": false,
+          "group_size": null,
+          "num_bits": 8,
+          "observer": "minmax",
+          "observer_kwargs": {},
+          "strategy": "tensor",
+          "symmetric": true,
+          "type": "int"
+        }
+      }
+    },
+    "format": "naive-quantized",
+    "global_compression_ratio": null,
+    "ignore": [
+      "lm_head"
+    ],
+    "kv_cache_scheme": null,
+    "quant_method": "compressed-tensors",
+    "quantization_status": "compressed",
+    "sparsity_config": {}
+  },
   "rms_norm_eps": 1e-06,
   "rope_scaling": null,
   "rope_theta": 1000000,
   "sliding_window": null,
   "tie_word_embeddings": true,
+  "torch_dtype": "float16",
   "transformers_version": "4.51.3",
   "use_cache": true,
   "use_sliding_window": false,

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2dc66f94b88125434979932c9794cd8d5f946b32a03cc7b2c6faabcbfa8459ef
-size 2384234968

 version https://git-lfs.github.com/spec/v1
+oid sha256:f989386eb1d54b4e8c90164597f5ed23e01318172950950e7c28bf5ccd072a4b
+size 751776664