awni commited on
Commit
a33164c
·
verified ·
1 Parent(s): abb1a48

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. config.json +66 -0
  2. generation_config.json +9 -0
  3. model-00001-of-00088.safetensors +3 -0
  4. model-00002-of-00088.safetensors +3 -0
  5. model-00003-of-00088.safetensors +3 -0
  6. model-00004-of-00088.safetensors +3 -0
  7. model-00005-of-00088.safetensors +3 -0
  8. model-00006-of-00088.safetensors +3 -0
  9. model-00007-of-00088.safetensors +3 -0
  10. model-00008-of-00088.safetensors +3 -0
  11. model-00009-of-00088.safetensors +3 -0
  12. model-00010-of-00088.safetensors +3 -0
  13. model-00011-of-00088.safetensors +3 -0
  14. model-00012-of-00088.safetensors +3 -0
  15. model-00013-of-00088.safetensors +3 -0
  16. model-00014-of-00088.safetensors +3 -0
  17. model-00015-of-00088.safetensors +3 -0
  18. model-00016-of-00088.safetensors +3 -0
  19. model-00017-of-00088.safetensors +3 -0
  20. model-00018-of-00088.safetensors +3 -0
  21. model-00019-of-00088.safetensors +3 -0
  22. model-00020-of-00088.safetensors +3 -0
  23. model-00021-of-00088.safetensors +3 -0
  24. model-00022-of-00088.safetensors +3 -0
  25. model-00023-of-00088.safetensors +3 -0
  26. model-00024-of-00088.safetensors +3 -0
  27. model-00025-of-00088.safetensors +3 -0
  28. model-00026-of-00088.safetensors +3 -0
  29. model-00027-of-00088.safetensors +3 -0
  30. model-00028-of-00088.safetensors +3 -0
  31. model-00029-of-00088.safetensors +3 -0
  32. model-00030-of-00088.safetensors +3 -0
  33. model-00031-of-00088.safetensors +3 -0
  34. model-00032-of-00088.safetensors +3 -0
  35. model-00033-of-00088.safetensors +3 -0
  36. model-00034-of-00088.safetensors +3 -0
  37. model-00035-of-00088.safetensors +3 -0
  38. model-00036-of-00088.safetensors +3 -0
  39. model-00037-of-00088.safetensors +3 -0
  40. model-00038-of-00088.safetensors +3 -0
  41. model-00039-of-00088.safetensors +3 -0
  42. model-00040-of-00088.safetensors +3 -0
  43. model-00041-of-00088.safetensors +3 -0
  44. model-00042-of-00088.safetensors +3 -0
  45. model-00043-of-00088.safetensors +3 -0
  46. model-00044-of-00088.safetensors +3 -0
  47. model-00045-of-00088.safetensors +3 -0
  48. model-00046-of-00088.safetensors +3 -0
  49. model-00047-of-00088.safetensors +3 -0
  50. model-00048-of-00088.safetensors +3 -0
config.json ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "DeepseekV3ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "auto_map": {
8
+ "AutoConfig": "configuration_deepseek.DeepseekV3Config",
9
+ "AutoModel": "modeling_deepseek.DeepseekV3Model",
10
+ "AutoModelForCausalLM": "modeling_deepseek.DeepseekV3ForCausalLM"
11
+ },
12
+ "bos_token_id": 0,
13
+ "eos_token_id": 1,
14
+ "ep_size": 1,
15
+ "first_k_dense_replace": 3,
16
+ "hidden_act": "silu",
17
+ "hidden_size": 7168,
18
+ "initializer_range": 0.02,
19
+ "intermediate_size": 18432,
20
+ "kv_lora_rank": 512,
21
+ "max_position_embeddings": 163840,
22
+ "model_type": "deepseek_v3",
23
+ "moe_intermediate_size": 2048,
24
+ "moe_layer_freq": 1,
25
+ "n_group": 8,
26
+ "n_routed_experts": 256,
27
+ "n_shared_experts": 1,
28
+ "norm_topk_prob": true,
29
+ "num_attention_heads": 128,
30
+ "num_experts_per_tok": 8,
31
+ "num_hidden_layers": 61,
32
+ "num_key_value_heads": 128,
33
+ "num_nextn_predict_layers": 1,
34
+ "q_lora_rank": 1536,
35
+ "qk_nope_head_dim": 128,
36
+ "qk_rope_head_dim": 64,
37
+ "quantization": {
38
+ "group_size": 64,
39
+ "bits": 4
40
+ },
41
+ "quantization_config": {
42
+ "group_size": 64,
43
+ "bits": 4
44
+ },
45
+ "rms_norm_eps": 1e-06,
46
+ "rope_scaling": {
47
+ "beta_fast": 32,
48
+ "beta_slow": 1,
49
+ "factor": 40,
50
+ "mscale": 1.0,
51
+ "mscale_all_dim": 1.0,
52
+ "original_max_position_embeddings": 4096,
53
+ "type": "yarn"
54
+ },
55
+ "rope_theta": 10000,
56
+ "routed_scaling_factor": 2.5,
57
+ "scoring_func": "sigmoid",
58
+ "tie_word_embeddings": false,
59
+ "topk_group": 4,
60
+ "topk_method": "noaux_tc",
61
+ "torch_dtype": "bfloat16",
62
+ "transformers_version": "4.46.3",
63
+ "use_cache": true,
64
+ "v_head_dim": 128,
65
+ "vocab_size": 129280
66
+ }
generation_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 0,
4
+ "eos_token_id": 1,
5
+ "do_sample": true,
6
+ "temperature": 0.6,
7
+ "top_p": 0.95,
8
+ "transformers_version": "4.46.3"
9
+ }
model-00001-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0afed7f7666fe2d5d2f4564092a0e2ba05065a31b64c0de17bdf6b7fd1d32fc3
3
+ size 3725148341
model-00002-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91e9b8b17df81fec22da698406914785ada0422c98f75ac104e9e18271c07add
3
+ size 4361585916
model-00003-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a879e181ab2dd26f3e218fa57c82aa3dfa74140060ad243057b81cb10ad89c57
3
+ size 4227859222
model-00004-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e10ba617d08556b3b784af898006458689da4e255063ea13d5682cb2318d807
3
+ size 4361585988
model-00005-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c806f2611dab94dc46aa990c02f0bc29e2203d978a92207a302c0ad4177a59fa
3
+ size 4361585926
model-00006-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8302b217eeda0a9ab261090fa419499efa37034a703885264a0324ff2bc275a0
3
+ size 4227859218
model-00007-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8d01d227d63ba898a1dbbc5d851287fd2ce2b72f633c074e5ba7c376b8cd592
3
+ size 4361585986
model-00008-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87cca8ec32c6ccb6d47d6c47132a9c58772069678a542e7c58da3613c4b7a40a
3
+ size 4361585894
model-00009-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8c043a1935d4c9eb2d0c98836a0a357cee318d8c5b86d4c55dbd25c0e44cfe9
3
+ size 4227859222
model-00010-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93c3c0004110f517cba02e72f4f512e74a36b354a44fb31dbeba3d9c979dfaa8
3
+ size 4361585912
model-00011-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2c1f05a521960592eed5946e8dafdbdc02de05a501d92755e6a6103a13677e4
3
+ size 4361585915
model-00012-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a32ab0bb152675f4921406fa276035bbf177c2d2f7afb8ead846b22dcab5693f
3
+ size 4227859228
model-00013-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e2c8bb062c14edd3c342303343d68a179c88b46189026529fe2ce0a51d5ffb5
3
+ size 4361585998
model-00014-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd6b0561078b523bfb3a0ca15b353136893c8a0d8c9a32fc8f9395171184e211
3
+ size 4361585928
model-00015-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea3c520736b1289013b93969462fbebbc6bc78cbab4c5f904629de269e0bd7c0
3
+ size 4227859228
model-00016-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e203d4f7c70be8fdf960a890640988cea175a0db78474d2d5dfa8dabd654d240
3
+ size 4361585992
model-00017-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21e9c3ed1da1f5a2c417c2096025894ff9e0340fc409cb57683ea278843943b9
3
+ size 4361585926
model-00018-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac32b976e617556bed7be51eaeb50eec666984f358edb1e20591a60ebf6566bf
3
+ size 4227859224
model-00019-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:656666829b795ef4ba3ce2052070debf7527bf7c5b250cd48505c968a3a12966
3
+ size 4361586000
model-00020-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a25d7f1402caf3ad8b36a5e4cbc14af32dc99eb18070068712097dc29ed0dae
3
+ size 4361585960
model-00021-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40cf196ac749fa3d15ea6fc22946556628c2cc042f1b1fe6ee7a95516f5f8229
3
+ size 4227859228
model-00022-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45b935218b05d0c381d33c67ab9a7d1392c937cf59cc96049579714feb2d2138
3
+ size 4361585978
model-00023-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3dd1008c2f13a4f56360d09f49d3365d2408872f9ce4d1c881887ddf4cd47eb0
3
+ size 4361585936
model-00024-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be08a7df3313eacad9b8bef61d9f5334d3ebf75fd2df88d7303613d5fcf4b46e
3
+ size 4227859230
model-00025-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b4069560cc0874e1c8fa55cfe6a9510e97ce300e5780f2be5d1f12843110b81
3
+ size 4361586022
model-00026-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4702ac4764b17a503ea19c98114ced5880c0f8cc4a6848908ee814c85aa8d23
3
+ size 4361585940
model-00027-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f6762a8cb043441339f1a7cda8c7f6cb6ec923d3f2fc535f8741cba09ba546f
3
+ size 4227859224
model-00028-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a43ab57cc8c58232edf0314b6148afb7a48c0bad2965e2334ff1bf1057284048
3
+ size 4361585990
model-00029-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4680ae4b93a06ff327ee6ebeca7a83b86d6d564a49bdd243ba54bcb8201ddbb3
3
+ size 4361585908
model-00030-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b6d86b5705663025789512d39e136f9efaa8405ac578704226752a78adf7075
3
+ size 4227859226
model-00031-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:476b3158a5eec2868dc45b196eb0057586e436816491bc99637db5ceafb5a8be
3
+ size 4361586024
model-00032-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c71daf82179aa808d1a324cd0678f1130593bc6aa142d9e5e6fee4341a113578
3
+ size 4361585970
model-00033-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:441d8a2ca1f91d5b3c36f5836fc5fb3d46b8822aa48ec0e45b4a2fc85ad70580
3
+ size 4227859222
model-00034-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9121cb9fc83ac9e18a4dc619d52ffa538675dee86a67e6aeb5d150a8f91ad8e
3
+ size 4361585978
model-00035-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e00036dd4a2411b08b3f3bc130692906f4b51d4d22e11d198675710ce6b3e35
3
+ size 4361585948
model-00036-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f62d4a8b04a901e5782f6595db0fe076fdabe880c897f9182e0ed91b94810e73
3
+ size 4227859228
model-00037-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b22fe15ec7524148d95853024dbf282508844925e4302de460f72a9cf3d349a9
3
+ size 4361586024
model-00038-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00b8b3cbf7f490aa995a3412c7fb6503f0b0db19231a8adf8f41738bfb83ae7a
3
+ size 4361585936
model-00039-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49a82c5efe7fb5efce9e72dd46932b200d1b41f1ad6d665ff05a3d2909556b14
3
+ size 4227859224
model-00040-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7d67515eacb35122785575e5541b29a71b0381e8ab7a9f74e8a5e456531c9c2
3
+ size 4361585964
model-00041-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87634c005b404ad8fe77945b2fa34681f1a962aab909d83e9070f15d7fc86cd2
3
+ size 4361585926
model-00042-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5aa120c5ab73b19456f207ecc4e4b05828c62db86ba9501321fa204639bcb927
3
+ size 4227859228
model-00043-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c226931ee25393d91478710dafbf61a215d9d2cf362fbe91ef082bc90a74f2f5
3
+ size 4361586022
model-00044-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19f6b555ec74b423fbd83d495db07a96f8dd76e3cedb9228aabfc866a8597556
3
+ size 4361585962
model-00045-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ea381862c36d0804c9d4b536738ff26307e91fb5ebdbcf93f3d7e3e031c61ee
3
+ size 4227859228
model-00046-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de1e7e8cc531bf9a9a323c8e7e82ae3b91f1134f4ff9c369d33bbd4bc4cfe790
3
+ size 4361586022
model-00047-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:860cfd0c97be4a07239009a169231ec08f05d086b3eae3e5b828ec0e142588e9
3
+ size 4361585922
model-00048-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b06813689ac5c15f582cd3e79d225b54b97f73c089e937c4d23e6f38bccb0f95
3
+ size 4227859228