Upload folder using huggingface_hub
Browse files- .gitattributes +15 -0
- torch25-cxx11-cu118-x86_64-linux/ggml/__init__.py +44 -0
- torch25-cxx11-cu118-x86_64-linux/ggml/_ggml_4fb4e1d_dirty.abi3.so +3 -0
- torch25-cxx11-cu118-x86_64-linux/ggml/_ops.py +9 -0
- torch25-cxx11-cu121-x86_64-linux/ggml/__init__.py +44 -0
- torch25-cxx11-cu121-x86_64-linux/ggml/_ggml_4fb4e1d_dirty.abi3.so +3 -0
- torch25-cxx11-cu121-x86_64-linux/ggml/_ops.py +9 -0
- torch25-cxx11-cu124-x86_64-linux/ggml/__init__.py +44 -0
- torch25-cxx11-cu124-x86_64-linux/ggml/_ggml_4fb4e1d_dirty.abi3.so +3 -0
- torch25-cxx11-cu124-x86_64-linux/ggml/_ops.py +9 -0
- torch25-cxx98-cu118-x86_64-linux/ggml/__init__.py +44 -0
- torch25-cxx98-cu118-x86_64-linux/ggml/_ggml_4fb4e1d_dirty.abi3.so +3 -0
- torch25-cxx98-cu118-x86_64-linux/ggml/_ops.py +9 -0
- torch25-cxx98-cu121-x86_64-linux/ggml/__init__.py +44 -0
- torch25-cxx98-cu121-x86_64-linux/ggml/_ggml_4fb4e1d_dirty.abi3.so +3 -0
- torch25-cxx98-cu121-x86_64-linux/ggml/_ops.py +9 -0
- torch25-cxx98-cu124-x86_64-linux/ggml/__init__.py +44 -0
- torch25-cxx98-cu124-x86_64-linux/ggml/_ggml_4fb4e1d_dirty.abi3.so +3 -0
- torch25-cxx98-cu124-x86_64-linux/ggml/_ops.py +9 -0
- torch26-cxx11-cu118-x86_64-linux/ggml/__init__.py +44 -0
- torch26-cxx11-cu118-x86_64-linux/ggml/_ggml_4fb4e1d_dirty.abi3.so +3 -0
- torch26-cxx11-cu118-x86_64-linux/ggml/_ops.py +9 -0
- torch26-cxx11-cu124-x86_64-linux/ggml/__init__.py +44 -0
- torch26-cxx11-cu124-x86_64-linux/ggml/_ggml_4fb4e1d_dirty.abi3.so +3 -0
- torch26-cxx11-cu124-x86_64-linux/ggml/_ops.py +9 -0
- torch26-cxx11-cu126-x86_64-linux/ggml/__init__.py +44 -0
- torch26-cxx11-cu126-x86_64-linux/ggml/_ggml_4fb4e1d_dirty.abi3.so +3 -0
- torch26-cxx11-cu126-x86_64-linux/ggml/_ops.py +9 -0
- torch26-cxx98-cu118-x86_64-linux/ggml/__init__.py +44 -0
- torch26-cxx98-cu118-x86_64-linux/ggml/_ggml_4fb4e1d_dirty.abi3.so +3 -0
- torch26-cxx98-cu118-x86_64-linux/ggml/_ops.py +9 -0
- torch26-cxx98-cu124-x86_64-linux/ggml/__init__.py +44 -0
- torch26-cxx98-cu124-x86_64-linux/ggml/_ggml_4fb4e1d_dirty.abi3.so +3 -0
- torch26-cxx98-cu124-x86_64-linux/ggml/_ops.py +9 -0
- torch26-cxx98-cu126-x86_64-linux/ggml/__init__.py +44 -0
- torch26-cxx98-cu126-x86_64-linux/ggml/_ggml_4fb4e1d_dirty.abi3.so +3 -0
- torch26-cxx98-cu126-x86_64-linux/ggml/_ops.py +9 -0
- torch27-cxx11-cu118-x86_64-linux/ggml/__init__.py +44 -0
- torch27-cxx11-cu118-x86_64-linux/ggml/_ggml_4fb4e1d_dirty.abi3.so +3 -0
- torch27-cxx11-cu118-x86_64-linux/ggml/_ops.py +9 -0
- torch27-cxx11-cu126-x86_64-linux/ggml/__init__.py +44 -0
- torch27-cxx11-cu126-x86_64-linux/ggml/_ggml_4fb4e1d_dirty.abi3.so +3 -0
- torch27-cxx11-cu126-x86_64-linux/ggml/_ops.py +9 -0
- torch27-cxx11-cu128-x86_64-linux/ggml/__init__.py +44 -0
- torch27-cxx11-cu128-x86_64-linux/ggml/_ggml_4fb4e1d_dirty.abi3.so +3 -0
- torch27-cxx11-cu128-x86_64-linux/ggml/_ops.py +9 -0
.gitattributes
CHANGED
@@ -33,3 +33,18 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
torch25-cxx11-cu118-x86_64-linux/ggml/_ggml_4fb4e1d_dirty.abi3.so filter=lfs diff=lfs merge=lfs -text
|
37 |
+
torch25-cxx11-cu121-x86_64-linux/ggml/_ggml_4fb4e1d_dirty.abi3.so filter=lfs diff=lfs merge=lfs -text
|
38 |
+
torch25-cxx11-cu124-x86_64-linux/ggml/_ggml_4fb4e1d_dirty.abi3.so filter=lfs diff=lfs merge=lfs -text
|
39 |
+
torch25-cxx98-cu118-x86_64-linux/ggml/_ggml_4fb4e1d_dirty.abi3.so filter=lfs diff=lfs merge=lfs -text
|
40 |
+
torch25-cxx98-cu121-x86_64-linux/ggml/_ggml_4fb4e1d_dirty.abi3.so filter=lfs diff=lfs merge=lfs -text
|
41 |
+
torch25-cxx98-cu124-x86_64-linux/ggml/_ggml_4fb4e1d_dirty.abi3.so filter=lfs diff=lfs merge=lfs -text
|
42 |
+
torch26-cxx11-cu118-x86_64-linux/ggml/_ggml_4fb4e1d_dirty.abi3.so filter=lfs diff=lfs merge=lfs -text
|
43 |
+
torch26-cxx11-cu124-x86_64-linux/ggml/_ggml_4fb4e1d_dirty.abi3.so filter=lfs diff=lfs merge=lfs -text
|
44 |
+
torch26-cxx11-cu126-x86_64-linux/ggml/_ggml_4fb4e1d_dirty.abi3.so filter=lfs diff=lfs merge=lfs -text
|
45 |
+
torch26-cxx98-cu118-x86_64-linux/ggml/_ggml_4fb4e1d_dirty.abi3.so filter=lfs diff=lfs merge=lfs -text
|
46 |
+
torch26-cxx98-cu124-x86_64-linux/ggml/_ggml_4fb4e1d_dirty.abi3.so filter=lfs diff=lfs merge=lfs -text
|
47 |
+
torch26-cxx98-cu126-x86_64-linux/ggml/_ggml_4fb4e1d_dirty.abi3.so filter=lfs diff=lfs merge=lfs -text
|
48 |
+
torch27-cxx11-cu118-x86_64-linux/ggml/_ggml_4fb4e1d_dirty.abi3.so filter=lfs diff=lfs merge=lfs -text
|
49 |
+
torch27-cxx11-cu126-x86_64-linux/ggml/_ggml_4fb4e1d_dirty.abi3.so filter=lfs diff=lfs merge=lfs -text
|
50 |
+
torch27-cxx11-cu128-x86_64-linux/ggml/_ggml_4fb4e1d_dirty.abi3.so filter=lfs diff=lfs merge=lfs -text
|
torch25-cxx11-cu118-x86_64-linux/ggml/__init__.py
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
|
3 |
+
try:
|
4 |
+
from ._ops import ops
|
5 |
+
except ImportError as e:
|
6 |
+
# Fallback for local development.
|
7 |
+
try:
|
8 |
+
import _ggml
|
9 |
+
|
10 |
+
ops = torch.ops._ggml
|
11 |
+
except ImportError:
|
12 |
+
raise e
|
13 |
+
|
14 |
+
|
15 |
+
def ggml_dequantize(
|
16 |
+
W: torch.Tensor,
|
17 |
+
quant_type: int,
|
18 |
+
m: int,
|
19 |
+
n: int,
|
20 |
+
) -> torch.Tensor:
|
21 |
+
"""Dequantize the GGML tensor."""
|
22 |
+
return ops.ggml_dequantize(W, quant_type, m, n)
|
23 |
+
|
24 |
+
|
25 |
+
def ggml_mul_mat_vec_a8(
|
26 |
+
W: torch.Tensor,
|
27 |
+
X: torch.Tensor,
|
28 |
+
quant_type: int,
|
29 |
+
row: int,
|
30 |
+
) -> torch.Tensor:
|
31 |
+
"""Mulmat with MMVQ kernel, require batch_size==1."""
|
32 |
+
batch = X.size(0)
|
33 |
+
assert batch == 1, "Batch size must be 1 for MMVQ kernel"
|
34 |
+
return ops.ggml_mul_mat_vec_a8(W, X, quant_type, row)
|
35 |
+
|
36 |
+
|
37 |
+
def ggml_mul_mat_a8(
|
38 |
+
W: torch.Tensor,
|
39 |
+
X: torch.Tensor,
|
40 |
+
quant_type: int,
|
41 |
+
row: int,
|
42 |
+
) -> torch.Tensor:
|
43 |
+
"""Mulmat through MMQ kernel for arbitrary batch size."""
|
44 |
+
return ops.ggml_mul_mat_a8(W, X, quant_type, row)
|
torch25-cxx11-cu118-x86_64-linux/ggml/_ggml_4fb4e1d_dirty.abi3.so
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1269b8fde5d2a9c7a2e606dc321daf91dc63727482c1c5a71075f4775442a192
|
3 |
+
size 10152640
|
torch25-cxx11-cu118-x86_64-linux/ggml/_ops.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from . import _ggml_4fb4e1d_dirty
|
3 |
+
ops = torch.ops._ggml_4fb4e1d_dirty
|
4 |
+
|
5 |
+
def add_op_namespace_prefix(op_name: str):
|
6 |
+
"""
|
7 |
+
Prefix op by namespace.
|
8 |
+
"""
|
9 |
+
return f"_ggml_4fb4e1d_dirty::{op_name}"
|
torch25-cxx11-cu121-x86_64-linux/ggml/__init__.py
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
|
3 |
+
try:
|
4 |
+
from ._ops import ops
|
5 |
+
except ImportError as e:
|
6 |
+
# Fallback for local development.
|
7 |
+
try:
|
8 |
+
import _ggml
|
9 |
+
|
10 |
+
ops = torch.ops._ggml
|
11 |
+
except ImportError:
|
12 |
+
raise e
|
13 |
+
|
14 |
+
|
15 |
+
def ggml_dequantize(
|
16 |
+
W: torch.Tensor,
|
17 |
+
quant_type: int,
|
18 |
+
m: int,
|
19 |
+
n: int,
|
20 |
+
) -> torch.Tensor:
|
21 |
+
"""Dequantize the GGML tensor."""
|
22 |
+
return ops.ggml_dequantize(W, quant_type, m, n)
|
23 |
+
|
24 |
+
|
25 |
+
def ggml_mul_mat_vec_a8(
|
26 |
+
W: torch.Tensor,
|
27 |
+
X: torch.Tensor,
|
28 |
+
quant_type: int,
|
29 |
+
row: int,
|
30 |
+
) -> torch.Tensor:
|
31 |
+
"""Mulmat with MMVQ kernel, require batch_size==1."""
|
32 |
+
batch = X.size(0)
|
33 |
+
assert batch == 1, "Batch size must be 1 for MMVQ kernel"
|
34 |
+
return ops.ggml_mul_mat_vec_a8(W, X, quant_type, row)
|
35 |
+
|
36 |
+
|
37 |
+
def ggml_mul_mat_a8(
|
38 |
+
W: torch.Tensor,
|
39 |
+
X: torch.Tensor,
|
40 |
+
quant_type: int,
|
41 |
+
row: int,
|
42 |
+
) -> torch.Tensor:
|
43 |
+
"""Mulmat through MMQ kernel for arbitrary batch size."""
|
44 |
+
return ops.ggml_mul_mat_a8(W, X, quant_type, row)
|
torch25-cxx11-cu121-x86_64-linux/ggml/_ggml_4fb4e1d_dirty.abi3.so
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b95f924ee9a29f5769ef65688f832b09475085a609ee77fb9b31babe71465bca
|
3 |
+
size 10171384
|
torch25-cxx11-cu121-x86_64-linux/ggml/_ops.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from . import _ggml_4fb4e1d_dirty
|
3 |
+
ops = torch.ops._ggml_4fb4e1d_dirty
|
4 |
+
|
5 |
+
def add_op_namespace_prefix(op_name: str):
|
6 |
+
"""
|
7 |
+
Prefix op by namespace.
|
8 |
+
"""
|
9 |
+
return f"_ggml_4fb4e1d_dirty::{op_name}"
|
torch25-cxx11-cu124-x86_64-linux/ggml/__init__.py
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
|
3 |
+
try:
|
4 |
+
from ._ops import ops
|
5 |
+
except ImportError as e:
|
6 |
+
# Fallback for local development.
|
7 |
+
try:
|
8 |
+
import _ggml
|
9 |
+
|
10 |
+
ops = torch.ops._ggml
|
11 |
+
except ImportError:
|
12 |
+
raise e
|
13 |
+
|
14 |
+
|
15 |
+
def ggml_dequantize(
|
16 |
+
W: torch.Tensor,
|
17 |
+
quant_type: int,
|
18 |
+
m: int,
|
19 |
+
n: int,
|
20 |
+
) -> torch.Tensor:
|
21 |
+
"""Dequantize the GGML tensor."""
|
22 |
+
return ops.ggml_dequantize(W, quant_type, m, n)
|
23 |
+
|
24 |
+
|
25 |
+
def ggml_mul_mat_vec_a8(
|
26 |
+
W: torch.Tensor,
|
27 |
+
X: torch.Tensor,
|
28 |
+
quant_type: int,
|
29 |
+
row: int,
|
30 |
+
) -> torch.Tensor:
|
31 |
+
"""Mulmat with MMVQ kernel, require batch_size==1."""
|
32 |
+
batch = X.size(0)
|
33 |
+
assert batch == 1, "Batch size must be 1 for MMVQ kernel"
|
34 |
+
return ops.ggml_mul_mat_vec_a8(W, X, quant_type, row)
|
35 |
+
|
36 |
+
|
37 |
+
def ggml_mul_mat_a8(
|
38 |
+
W: torch.Tensor,
|
39 |
+
X: torch.Tensor,
|
40 |
+
quant_type: int,
|
41 |
+
row: int,
|
42 |
+
) -> torch.Tensor:
|
43 |
+
"""Mulmat through MMQ kernel for arbitrary batch size."""
|
44 |
+
return ops.ggml_mul_mat_a8(W, X, quant_type, row)
|
torch25-cxx11-cu124-x86_64-linux/ggml/_ggml_4fb4e1d_dirty.abi3.so
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d165bb84617b2640188838170d9c4f2820b34e464c48aa2e16931b94603f75ec
|
3 |
+
size 9874216
|
torch25-cxx11-cu124-x86_64-linux/ggml/_ops.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from . import _ggml_4fb4e1d_dirty
|
3 |
+
ops = torch.ops._ggml_4fb4e1d_dirty
|
4 |
+
|
5 |
+
def add_op_namespace_prefix(op_name: str):
|
6 |
+
"""
|
7 |
+
Prefix op by namespace.
|
8 |
+
"""
|
9 |
+
return f"_ggml_4fb4e1d_dirty::{op_name}"
|
torch25-cxx98-cu118-x86_64-linux/ggml/__init__.py
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
|
3 |
+
try:
|
4 |
+
from ._ops import ops
|
5 |
+
except ImportError as e:
|
6 |
+
# Fallback for local development.
|
7 |
+
try:
|
8 |
+
import _ggml
|
9 |
+
|
10 |
+
ops = torch.ops._ggml
|
11 |
+
except ImportError:
|
12 |
+
raise e
|
13 |
+
|
14 |
+
|
15 |
+
def ggml_dequantize(
|
16 |
+
W: torch.Tensor,
|
17 |
+
quant_type: int,
|
18 |
+
m: int,
|
19 |
+
n: int,
|
20 |
+
) -> torch.Tensor:
|
21 |
+
"""Dequantize the GGML tensor."""
|
22 |
+
return ops.ggml_dequantize(W, quant_type, m, n)
|
23 |
+
|
24 |
+
|
25 |
+
def ggml_mul_mat_vec_a8(
|
26 |
+
W: torch.Tensor,
|
27 |
+
X: torch.Tensor,
|
28 |
+
quant_type: int,
|
29 |
+
row: int,
|
30 |
+
) -> torch.Tensor:
|
31 |
+
"""Mulmat with MMVQ kernel, require batch_size==1."""
|
32 |
+
batch = X.size(0)
|
33 |
+
assert batch == 1, "Batch size must be 1 for MMVQ kernel"
|
34 |
+
return ops.ggml_mul_mat_vec_a8(W, X, quant_type, row)
|
35 |
+
|
36 |
+
|
37 |
+
def ggml_mul_mat_a8(
|
38 |
+
W: torch.Tensor,
|
39 |
+
X: torch.Tensor,
|
40 |
+
quant_type: int,
|
41 |
+
row: int,
|
42 |
+
) -> torch.Tensor:
|
43 |
+
"""Mulmat through MMQ kernel for arbitrary batch size."""
|
44 |
+
return ops.ggml_mul_mat_a8(W, X, quant_type, row)
|
torch25-cxx98-cu118-x86_64-linux/ggml/_ggml_4fb4e1d_dirty.abi3.so
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:15447a768b451098c7104e0aa77fea0a8365e67eb6fae04a65f38a1aa90fe56e
|
3 |
+
size 10144848
|
torch25-cxx98-cu118-x86_64-linux/ggml/_ops.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from . import _ggml_4fb4e1d_dirty
|
3 |
+
ops = torch.ops._ggml_4fb4e1d_dirty
|
4 |
+
|
5 |
+
def add_op_namespace_prefix(op_name: str):
|
6 |
+
"""
|
7 |
+
Prefix op by namespace.
|
8 |
+
"""
|
9 |
+
return f"_ggml_4fb4e1d_dirty::{op_name}"
|
torch25-cxx98-cu121-x86_64-linux/ggml/__init__.py
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
|
3 |
+
try:
|
4 |
+
from ._ops import ops
|
5 |
+
except ImportError as e:
|
6 |
+
# Fallback for local development.
|
7 |
+
try:
|
8 |
+
import _ggml
|
9 |
+
|
10 |
+
ops = torch.ops._ggml
|
11 |
+
except ImportError:
|
12 |
+
raise e
|
13 |
+
|
14 |
+
|
15 |
+
def ggml_dequantize(
|
16 |
+
W: torch.Tensor,
|
17 |
+
quant_type: int,
|
18 |
+
m: int,
|
19 |
+
n: int,
|
20 |
+
) -> torch.Tensor:
|
21 |
+
"""Dequantize the GGML tensor."""
|
22 |
+
return ops.ggml_dequantize(W, quant_type, m, n)
|
23 |
+
|
24 |
+
|
25 |
+
def ggml_mul_mat_vec_a8(
|
26 |
+
W: torch.Tensor,
|
27 |
+
X: torch.Tensor,
|
28 |
+
quant_type: int,
|
29 |
+
row: int,
|
30 |
+
) -> torch.Tensor:
|
31 |
+
"""Mulmat with MMVQ kernel, require batch_size==1."""
|
32 |
+
batch = X.size(0)
|
33 |
+
assert batch == 1, "Batch size must be 1 for MMVQ kernel"
|
34 |
+
return ops.ggml_mul_mat_vec_a8(W, X, quant_type, row)
|
35 |
+
|
36 |
+
|
37 |
+
def ggml_mul_mat_a8(
|
38 |
+
W: torch.Tensor,
|
39 |
+
X: torch.Tensor,
|
40 |
+
quant_type: int,
|
41 |
+
row: int,
|
42 |
+
) -> torch.Tensor:
|
43 |
+
"""Mulmat through MMQ kernel for arbitrary batch size."""
|
44 |
+
return ops.ggml_mul_mat_a8(W, X, quant_type, row)
|
torch25-cxx98-cu121-x86_64-linux/ggml/_ggml_4fb4e1d_dirty.abi3.so
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f4bf44b77425f1c011592bd6379e22041c38f35ac6928668f71d2056f6e4bf89
|
3 |
+
size 10163720
|
torch25-cxx98-cu121-x86_64-linux/ggml/_ops.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from . import _ggml_4fb4e1d_dirty
|
3 |
+
ops = torch.ops._ggml_4fb4e1d_dirty
|
4 |
+
|
5 |
+
def add_op_namespace_prefix(op_name: str):
|
6 |
+
"""
|
7 |
+
Prefix op by namespace.
|
8 |
+
"""
|
9 |
+
return f"_ggml_4fb4e1d_dirty::{op_name}"
|
torch25-cxx98-cu124-x86_64-linux/ggml/__init__.py
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
|
3 |
+
try:
|
4 |
+
from ._ops import ops
|
5 |
+
except ImportError as e:
|
6 |
+
# Fallback for local development.
|
7 |
+
try:
|
8 |
+
import _ggml
|
9 |
+
|
10 |
+
ops = torch.ops._ggml
|
11 |
+
except ImportError:
|
12 |
+
raise e
|
13 |
+
|
14 |
+
|
15 |
+
def ggml_dequantize(
|
16 |
+
W: torch.Tensor,
|
17 |
+
quant_type: int,
|
18 |
+
m: int,
|
19 |
+
n: int,
|
20 |
+
) -> torch.Tensor:
|
21 |
+
"""Dequantize the GGML tensor."""
|
22 |
+
return ops.ggml_dequantize(W, quant_type, m, n)
|
23 |
+
|
24 |
+
|
25 |
+
def ggml_mul_mat_vec_a8(
|
26 |
+
W: torch.Tensor,
|
27 |
+
X: torch.Tensor,
|
28 |
+
quant_type: int,
|
29 |
+
row: int,
|
30 |
+
) -> torch.Tensor:
|
31 |
+
"""Mulmat with MMVQ kernel, require batch_size==1."""
|
32 |
+
batch = X.size(0)
|
33 |
+
assert batch == 1, "Batch size must be 1 for MMVQ kernel"
|
34 |
+
return ops.ggml_mul_mat_vec_a8(W, X, quant_type, row)
|
35 |
+
|
36 |
+
|
37 |
+
def ggml_mul_mat_a8(
|
38 |
+
W: torch.Tensor,
|
39 |
+
X: torch.Tensor,
|
40 |
+
quant_type: int,
|
41 |
+
row: int,
|
42 |
+
) -> torch.Tensor:
|
43 |
+
"""Mulmat through MMQ kernel for arbitrary batch size."""
|
44 |
+
return ops.ggml_mul_mat_a8(W, X, quant_type, row)
|
torch25-cxx98-cu124-x86_64-linux/ggml/_ggml_4fb4e1d_dirty.abi3.so
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:295ca6540248aaff99492253ab7bdb14161a91edd38db0e78bd2db82eff9742c
|
3 |
+
size 9866632
|
torch25-cxx98-cu124-x86_64-linux/ggml/_ops.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from . import _ggml_4fb4e1d_dirty
|
3 |
+
ops = torch.ops._ggml_4fb4e1d_dirty
|
4 |
+
|
5 |
+
def add_op_namespace_prefix(op_name: str):
|
6 |
+
"""
|
7 |
+
Prefix op by namespace.
|
8 |
+
"""
|
9 |
+
return f"_ggml_4fb4e1d_dirty::{op_name}"
|
torch26-cxx11-cu118-x86_64-linux/ggml/__init__.py
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
|
3 |
+
try:
|
4 |
+
from ._ops import ops
|
5 |
+
except ImportError as e:
|
6 |
+
# Fallback for local development.
|
7 |
+
try:
|
8 |
+
import _ggml
|
9 |
+
|
10 |
+
ops = torch.ops._ggml
|
11 |
+
except ImportError:
|
12 |
+
raise e
|
13 |
+
|
14 |
+
|
15 |
+
def ggml_dequantize(
|
16 |
+
W: torch.Tensor,
|
17 |
+
quant_type: int,
|
18 |
+
m: int,
|
19 |
+
n: int,
|
20 |
+
) -> torch.Tensor:
|
21 |
+
"""Dequantize the GGML tensor."""
|
22 |
+
return ops.ggml_dequantize(W, quant_type, m, n)
|
23 |
+
|
24 |
+
|
25 |
+
def ggml_mul_mat_vec_a8(
|
26 |
+
W: torch.Tensor,
|
27 |
+
X: torch.Tensor,
|
28 |
+
quant_type: int,
|
29 |
+
row: int,
|
30 |
+
) -> torch.Tensor:
|
31 |
+
"""Mulmat with MMVQ kernel, require batch_size==1."""
|
32 |
+
batch = X.size(0)
|
33 |
+
assert batch == 1, "Batch size must be 1 for MMVQ kernel"
|
34 |
+
return ops.ggml_mul_mat_vec_a8(W, X, quant_type, row)
|
35 |
+
|
36 |
+
|
37 |
+
def ggml_mul_mat_a8(
|
38 |
+
W: torch.Tensor,
|
39 |
+
X: torch.Tensor,
|
40 |
+
quant_type: int,
|
41 |
+
row: int,
|
42 |
+
) -> torch.Tensor:
|
43 |
+
"""Mulmat through MMQ kernel for arbitrary batch size."""
|
44 |
+
return ops.ggml_mul_mat_a8(W, X, quant_type, row)
|
torch26-cxx11-cu118-x86_64-linux/ggml/_ggml_4fb4e1d_dirty.abi3.so
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c58cba78fe2839b6316e50791277312c526e1eb110372d5caa21125464129714
|
3 |
+
size 10152816
|
torch26-cxx11-cu118-x86_64-linux/ggml/_ops.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from . import _ggml_4fb4e1d_dirty
|
3 |
+
ops = torch.ops._ggml_4fb4e1d_dirty
|
4 |
+
|
5 |
+
def add_op_namespace_prefix(op_name: str):
|
6 |
+
"""
|
7 |
+
Prefix op by namespace.
|
8 |
+
"""
|
9 |
+
return f"_ggml_4fb4e1d_dirty::{op_name}"
|
torch26-cxx11-cu124-x86_64-linux/ggml/__init__.py
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
|
3 |
+
try:
|
4 |
+
from ._ops import ops
|
5 |
+
except ImportError as e:
|
6 |
+
# Fallback for local development.
|
7 |
+
try:
|
8 |
+
import _ggml
|
9 |
+
|
10 |
+
ops = torch.ops._ggml
|
11 |
+
except ImportError:
|
12 |
+
raise e
|
13 |
+
|
14 |
+
|
15 |
+
def ggml_dequantize(
|
16 |
+
W: torch.Tensor,
|
17 |
+
quant_type: int,
|
18 |
+
m: int,
|
19 |
+
n: int,
|
20 |
+
) -> torch.Tensor:
|
21 |
+
"""Dequantize the GGML tensor."""
|
22 |
+
return ops.ggml_dequantize(W, quant_type, m, n)
|
23 |
+
|
24 |
+
|
25 |
+
def ggml_mul_mat_vec_a8(
|
26 |
+
W: torch.Tensor,
|
27 |
+
X: torch.Tensor,
|
28 |
+
quant_type: int,
|
29 |
+
row: int,
|
30 |
+
) -> torch.Tensor:
|
31 |
+
"""Mulmat with MMVQ kernel, require batch_size==1."""
|
32 |
+
batch = X.size(0)
|
33 |
+
assert batch == 1, "Batch size must be 1 for MMVQ kernel"
|
34 |
+
return ops.ggml_mul_mat_vec_a8(W, X, quant_type, row)
|
35 |
+
|
36 |
+
|
37 |
+
def ggml_mul_mat_a8(
|
38 |
+
W: torch.Tensor,
|
39 |
+
X: torch.Tensor,
|
40 |
+
quant_type: int,
|
41 |
+
row: int,
|
42 |
+
) -> torch.Tensor:
|
43 |
+
"""Mulmat through MMQ kernel for arbitrary batch size."""
|
44 |
+
return ops.ggml_mul_mat_a8(W, X, quant_type, row)
|
torch26-cxx11-cu124-x86_64-linux/ggml/_ggml_4fb4e1d_dirty.abi3.so
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f7aac3705f49e22523df2665416feb67011252a4dedefe17506b2a19bfc589d8
|
3 |
+
size 9870296
|
torch26-cxx11-cu124-x86_64-linux/ggml/_ops.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from . import _ggml_4fb4e1d_dirty
|
3 |
+
ops = torch.ops._ggml_4fb4e1d_dirty
|
4 |
+
|
5 |
+
def add_op_namespace_prefix(op_name: str):
|
6 |
+
"""
|
7 |
+
Prefix op by namespace.
|
8 |
+
"""
|
9 |
+
return f"_ggml_4fb4e1d_dirty::{op_name}"
|
torch26-cxx11-cu126-x86_64-linux/ggml/__init__.py
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
|
3 |
+
try:
|
4 |
+
from ._ops import ops
|
5 |
+
except ImportError as e:
|
6 |
+
# Fallback for local development.
|
7 |
+
try:
|
8 |
+
import _ggml
|
9 |
+
|
10 |
+
ops = torch.ops._ggml
|
11 |
+
except ImportError:
|
12 |
+
raise e
|
13 |
+
|
14 |
+
|
15 |
+
def ggml_dequantize(
|
16 |
+
W: torch.Tensor,
|
17 |
+
quant_type: int,
|
18 |
+
m: int,
|
19 |
+
n: int,
|
20 |
+
) -> torch.Tensor:
|
21 |
+
"""Dequantize the GGML tensor."""
|
22 |
+
return ops.ggml_dequantize(W, quant_type, m, n)
|
23 |
+
|
24 |
+
|
25 |
+
def ggml_mul_mat_vec_a8(
|
26 |
+
W: torch.Tensor,
|
27 |
+
X: torch.Tensor,
|
28 |
+
quant_type: int,
|
29 |
+
row: int,
|
30 |
+
) -> torch.Tensor:
|
31 |
+
"""Mulmat with MMVQ kernel, require batch_size==1."""
|
32 |
+
batch = X.size(0)
|
33 |
+
assert batch == 1, "Batch size must be 1 for MMVQ kernel"
|
34 |
+
return ops.ggml_mul_mat_vec_a8(W, X, quant_type, row)
|
35 |
+
|
36 |
+
|
37 |
+
def ggml_mul_mat_a8(
|
38 |
+
W: torch.Tensor,
|
39 |
+
X: torch.Tensor,
|
40 |
+
quant_type: int,
|
41 |
+
row: int,
|
42 |
+
) -> torch.Tensor:
|
43 |
+
"""Mulmat through MMQ kernel for arbitrary batch size."""
|
44 |
+
return ops.ggml_mul_mat_a8(W, X, quant_type, row)
|
torch26-cxx11-cu126-x86_64-linux/ggml/_ggml_4fb4e1d_dirty.abi3.so
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c8bf474a0da8e8cb466b48cb21599eef8c53975e984cef0028ca6c1d447496c2
|
3 |
+
size 9829776
|
torch26-cxx11-cu126-x86_64-linux/ggml/_ops.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from . import _ggml_4fb4e1d_dirty
|
3 |
+
ops = torch.ops._ggml_4fb4e1d_dirty
|
4 |
+
|
5 |
+
def add_op_namespace_prefix(op_name: str):
|
6 |
+
"""
|
7 |
+
Prefix op by namespace.
|
8 |
+
"""
|
9 |
+
return f"_ggml_4fb4e1d_dirty::{op_name}"
|
torch26-cxx98-cu118-x86_64-linux/ggml/__init__.py
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
|
3 |
+
try:
|
4 |
+
from ._ops import ops
|
5 |
+
except ImportError as e:
|
6 |
+
# Fallback for local development.
|
7 |
+
try:
|
8 |
+
import _ggml
|
9 |
+
|
10 |
+
ops = torch.ops._ggml
|
11 |
+
except ImportError:
|
12 |
+
raise e
|
13 |
+
|
14 |
+
|
15 |
+
def ggml_dequantize(
|
16 |
+
W: torch.Tensor,
|
17 |
+
quant_type: int,
|
18 |
+
m: int,
|
19 |
+
n: int,
|
20 |
+
) -> torch.Tensor:
|
21 |
+
"""Dequantize the GGML tensor."""
|
22 |
+
return ops.ggml_dequantize(W, quant_type, m, n)
|
23 |
+
|
24 |
+
|
25 |
+
def ggml_mul_mat_vec_a8(
|
26 |
+
W: torch.Tensor,
|
27 |
+
X: torch.Tensor,
|
28 |
+
quant_type: int,
|
29 |
+
row: int,
|
30 |
+
) -> torch.Tensor:
|
31 |
+
"""Mulmat with MMVQ kernel, require batch_size==1."""
|
32 |
+
batch = X.size(0)
|
33 |
+
assert batch == 1, "Batch size must be 1 for MMVQ kernel"
|
34 |
+
return ops.ggml_mul_mat_vec_a8(W, X, quant_type, row)
|
35 |
+
|
36 |
+
|
37 |
+
def ggml_mul_mat_a8(
|
38 |
+
W: torch.Tensor,
|
39 |
+
X: torch.Tensor,
|
40 |
+
quant_type: int,
|
41 |
+
row: int,
|
42 |
+
) -> torch.Tensor:
|
43 |
+
"""Mulmat through MMQ kernel for arbitrary batch size."""
|
44 |
+
return ops.ggml_mul_mat_a8(W, X, quant_type, row)
|
torch26-cxx98-cu118-x86_64-linux/ggml/_ggml_4fb4e1d_dirty.abi3.so
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:da018854b3ab695bd922e0f1ef44905f0f0623a52ddfaee687af95daaad4b859
|
3 |
+
size 10145072
|
torch26-cxx98-cu118-x86_64-linux/ggml/_ops.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from . import _ggml_4fb4e1d_dirty
|
3 |
+
ops = torch.ops._ggml_4fb4e1d_dirty
|
4 |
+
|
5 |
+
def add_op_namespace_prefix(op_name: str):
|
6 |
+
"""
|
7 |
+
Prefix op by namespace.
|
8 |
+
"""
|
9 |
+
return f"_ggml_4fb4e1d_dirty::{op_name}"
|
torch26-cxx98-cu124-x86_64-linux/ggml/__init__.py
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
|
3 |
+
try:
|
4 |
+
from ._ops import ops
|
5 |
+
except ImportError as e:
|
6 |
+
# Fallback for local development.
|
7 |
+
try:
|
8 |
+
import _ggml
|
9 |
+
|
10 |
+
ops = torch.ops._ggml
|
11 |
+
except ImportError:
|
12 |
+
raise e
|
13 |
+
|
14 |
+
|
15 |
+
def ggml_dequantize(
|
16 |
+
W: torch.Tensor,
|
17 |
+
quant_type: int,
|
18 |
+
m: int,
|
19 |
+
n: int,
|
20 |
+
) -> torch.Tensor:
|
21 |
+
"""Dequantize the GGML tensor."""
|
22 |
+
return ops.ggml_dequantize(W, quant_type, m, n)
|
23 |
+
|
24 |
+
|
25 |
+
def ggml_mul_mat_vec_a8(
|
26 |
+
W: torch.Tensor,
|
27 |
+
X: torch.Tensor,
|
28 |
+
quant_type: int,
|
29 |
+
row: int,
|
30 |
+
) -> torch.Tensor:
|
31 |
+
"""Mulmat with MMVQ kernel, require batch_size==1."""
|
32 |
+
batch = X.size(0)
|
33 |
+
assert batch == 1, "Batch size must be 1 for MMVQ kernel"
|
34 |
+
return ops.ggml_mul_mat_vec_a8(W, X, quant_type, row)
|
35 |
+
|
36 |
+
|
37 |
+
def ggml_mul_mat_a8(
|
38 |
+
W: torch.Tensor,
|
39 |
+
X: torch.Tensor,
|
40 |
+
quant_type: int,
|
41 |
+
row: int,
|
42 |
+
) -> torch.Tensor:
|
43 |
+
"""Mulmat through MMQ kernel for arbitrary batch size."""
|
44 |
+
return ops.ggml_mul_mat_a8(W, X, quant_type, row)
|
torch26-cxx98-cu124-x86_64-linux/ggml/_ggml_4fb4e1d_dirty.abi3.so
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1d5028c948c26d5ac3a62367c325e99a7a1b3030db2793c5143a5bc8938410bb
|
3 |
+
size 9866856
|
torch26-cxx98-cu124-x86_64-linux/ggml/_ops.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from . import _ggml_4fb4e1d_dirty
|
3 |
+
ops = torch.ops._ggml_4fb4e1d_dirty
|
4 |
+
|
5 |
+
def add_op_namespace_prefix(op_name: str):
|
6 |
+
"""
|
7 |
+
Prefix op by namespace.
|
8 |
+
"""
|
9 |
+
return f"_ggml_4fb4e1d_dirty::{op_name}"
|
torch26-cxx98-cu126-x86_64-linux/ggml/__init__.py
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
|
3 |
+
try:
|
4 |
+
from ._ops import ops
|
5 |
+
except ImportError as e:
|
6 |
+
# Fallback for local development.
|
7 |
+
try:
|
8 |
+
import _ggml
|
9 |
+
|
10 |
+
ops = torch.ops._ggml
|
11 |
+
except ImportError:
|
12 |
+
raise e
|
13 |
+
|
14 |
+
|
15 |
+
def ggml_dequantize(
|
16 |
+
W: torch.Tensor,
|
17 |
+
quant_type: int,
|
18 |
+
m: int,
|
19 |
+
n: int,
|
20 |
+
) -> torch.Tensor:
|
21 |
+
"""Dequantize the GGML tensor."""
|
22 |
+
return ops.ggml_dequantize(W, quant_type, m, n)
|
23 |
+
|
24 |
+
|
25 |
+
def ggml_mul_mat_vec_a8(
|
26 |
+
W: torch.Tensor,
|
27 |
+
X: torch.Tensor,
|
28 |
+
quant_type: int,
|
29 |
+
row: int,
|
30 |
+
) -> torch.Tensor:
|
31 |
+
"""Mulmat with MMVQ kernel, require batch_size==1."""
|
32 |
+
batch = X.size(0)
|
33 |
+
assert batch == 1, "Batch size must be 1 for MMVQ kernel"
|
34 |
+
return ops.ggml_mul_mat_vec_a8(W, X, quant_type, row)
|
35 |
+
|
36 |
+
|
37 |
+
def ggml_mul_mat_a8(
|
38 |
+
W: torch.Tensor,
|
39 |
+
X: torch.Tensor,
|
40 |
+
quant_type: int,
|
41 |
+
row: int,
|
42 |
+
) -> torch.Tensor:
|
43 |
+
"""Mulmat through MMQ kernel for arbitrary batch size."""
|
44 |
+
return ops.ggml_mul_mat_a8(W, X, quant_type, row)
|
torch26-cxx98-cu126-x86_64-linux/ggml/_ggml_4fb4e1d_dirty.abi3.so
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:286fdf8b4f36ed0394435774d1e506c3017922372e2346a9af66e92241eba5ac
|
3 |
+
size 9818144
|
torch26-cxx98-cu126-x86_64-linux/ggml/_ops.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from . import _ggml_4fb4e1d_dirty
|
3 |
+
ops = torch.ops._ggml_4fb4e1d_dirty
|
4 |
+
|
5 |
+
def add_op_namespace_prefix(op_name: str):
|
6 |
+
"""
|
7 |
+
Prefix op by namespace.
|
8 |
+
"""
|
9 |
+
return f"_ggml_4fb4e1d_dirty::{op_name}"
|
torch27-cxx11-cu118-x86_64-linux/ggml/__init__.py
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
|
3 |
+
try:
|
4 |
+
from ._ops import ops
|
5 |
+
except ImportError as e:
|
6 |
+
# Fallback for local development.
|
7 |
+
try:
|
8 |
+
import _ggml
|
9 |
+
|
10 |
+
ops = torch.ops._ggml
|
11 |
+
except ImportError:
|
12 |
+
raise e
|
13 |
+
|
14 |
+
|
15 |
+
def ggml_dequantize(
|
16 |
+
W: torch.Tensor,
|
17 |
+
quant_type: int,
|
18 |
+
m: int,
|
19 |
+
n: int,
|
20 |
+
) -> torch.Tensor:
|
21 |
+
"""Dequantize the GGML tensor."""
|
22 |
+
return ops.ggml_dequantize(W, quant_type, m, n)
|
23 |
+
|
24 |
+
|
25 |
+
def ggml_mul_mat_vec_a8(
|
26 |
+
W: torch.Tensor,
|
27 |
+
X: torch.Tensor,
|
28 |
+
quant_type: int,
|
29 |
+
row: int,
|
30 |
+
) -> torch.Tensor:
|
31 |
+
"""Mulmat with MMVQ kernel, require batch_size==1."""
|
32 |
+
batch = X.size(0)
|
33 |
+
assert batch == 1, "Batch size must be 1 for MMVQ kernel"
|
34 |
+
return ops.ggml_mul_mat_vec_a8(W, X, quant_type, row)
|
35 |
+
|
36 |
+
|
37 |
+
def ggml_mul_mat_a8(
|
38 |
+
W: torch.Tensor,
|
39 |
+
X: torch.Tensor,
|
40 |
+
quant_type: int,
|
41 |
+
row: int,
|
42 |
+
) -> torch.Tensor:
|
43 |
+
"""Mulmat through MMQ kernel for arbitrary batch size."""
|
44 |
+
return ops.ggml_mul_mat_a8(W, X, quant_type, row)
|
torch27-cxx11-cu118-x86_64-linux/ggml/_ggml_4fb4e1d_dirty.abi3.so
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a923e0437ff129320135f3ddd53504f5a79df4595b4667b0016be3d746da15a
|
3 |
+
size 10152944
|
torch27-cxx11-cu118-x86_64-linux/ggml/_ops.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from . import _ggml_4fb4e1d_dirty
|
3 |
+
ops = torch.ops._ggml_4fb4e1d_dirty
|
4 |
+
|
5 |
+
def add_op_namespace_prefix(op_name: str):
|
6 |
+
"""
|
7 |
+
Prefix op by namespace.
|
8 |
+
"""
|
9 |
+
return f"_ggml_4fb4e1d_dirty::{op_name}"
|
torch27-cxx11-cu126-x86_64-linux/ggml/__init__.py
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
|
3 |
+
try:
|
4 |
+
from ._ops import ops
|
5 |
+
except ImportError as e:
|
6 |
+
# Fallback for local development.
|
7 |
+
try:
|
8 |
+
import _ggml
|
9 |
+
|
10 |
+
ops = torch.ops._ggml
|
11 |
+
except ImportError:
|
12 |
+
raise e
|
13 |
+
|
14 |
+
|
15 |
+
def ggml_dequantize(
|
16 |
+
W: torch.Tensor,
|
17 |
+
quant_type: int,
|
18 |
+
m: int,
|
19 |
+
n: int,
|
20 |
+
) -> torch.Tensor:
|
21 |
+
"""Dequantize the GGML tensor."""
|
22 |
+
return ops.ggml_dequantize(W, quant_type, m, n)
|
23 |
+
|
24 |
+
|
25 |
+
def ggml_mul_mat_vec_a8(
|
26 |
+
W: torch.Tensor,
|
27 |
+
X: torch.Tensor,
|
28 |
+
quant_type: int,
|
29 |
+
row: int,
|
30 |
+
) -> torch.Tensor:
|
31 |
+
"""Mulmat with MMVQ kernel, require batch_size==1."""
|
32 |
+
batch = X.size(0)
|
33 |
+
assert batch == 1, "Batch size must be 1 for MMVQ kernel"
|
34 |
+
return ops.ggml_mul_mat_vec_a8(W, X, quant_type, row)
|
35 |
+
|
36 |
+
|
37 |
+
def ggml_mul_mat_a8(
|
38 |
+
W: torch.Tensor,
|
39 |
+
X: torch.Tensor,
|
40 |
+
quant_type: int,
|
41 |
+
row: int,
|
42 |
+
) -> torch.Tensor:
|
43 |
+
"""Mulmat through MMQ kernel for arbitrary batch size."""
|
44 |
+
return ops.ggml_mul_mat_a8(W, X, quant_type, row)
|
torch27-cxx11-cu126-x86_64-linux/ggml/_ggml_4fb4e1d_dirty.abi3.so
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8616e58d50c639d6fa04f78f1ccba049ac4c9bf3bb6d77cf80a469e023c5f362
|
3 |
+
size 9825712
|
torch27-cxx11-cu126-x86_64-linux/ggml/_ops.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from . import _ggml_4fb4e1d_dirty
|
3 |
+
ops = torch.ops._ggml_4fb4e1d_dirty
|
4 |
+
|
5 |
+
def add_op_namespace_prefix(op_name: str):
|
6 |
+
"""
|
7 |
+
Prefix op by namespace.
|
8 |
+
"""
|
9 |
+
return f"_ggml_4fb4e1d_dirty::{op_name}"
|
torch27-cxx11-cu128-x86_64-linux/ggml/__init__.py
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
|
3 |
+
try:
|
4 |
+
from ._ops import ops
|
5 |
+
except ImportError as e:
|
6 |
+
# Fallback for local development.
|
7 |
+
try:
|
8 |
+
import _ggml
|
9 |
+
|
10 |
+
ops = torch.ops._ggml
|
11 |
+
except ImportError:
|
12 |
+
raise e
|
13 |
+
|
14 |
+
|
15 |
+
def ggml_dequantize(
|
16 |
+
W: torch.Tensor,
|
17 |
+
quant_type: int,
|
18 |
+
m: int,
|
19 |
+
n: int,
|
20 |
+
) -> torch.Tensor:
|
21 |
+
"""Dequantize the GGML tensor."""
|
22 |
+
return ops.ggml_dequantize(W, quant_type, m, n)
|
23 |
+
|
24 |
+
|
25 |
+
def ggml_mul_mat_vec_a8(
|
26 |
+
W: torch.Tensor,
|
27 |
+
X: torch.Tensor,
|
28 |
+
quant_type: int,
|
29 |
+
row: int,
|
30 |
+
) -> torch.Tensor:
|
31 |
+
"""Mulmat with MMVQ kernel, require batch_size==1."""
|
32 |
+
batch = X.size(0)
|
33 |
+
assert batch == 1, "Batch size must be 1 for MMVQ kernel"
|
34 |
+
return ops.ggml_mul_mat_vec_a8(W, X, quant_type, row)
|
35 |
+
|
36 |
+
|
37 |
+
def ggml_mul_mat_a8(
|
38 |
+
W: torch.Tensor,
|
39 |
+
X: torch.Tensor,
|
40 |
+
quant_type: int,
|
41 |
+
row: int,
|
42 |
+
) -> torch.Tensor:
|
43 |
+
"""Mulmat through MMQ kernel for arbitrary batch size."""
|
44 |
+
return ops.ggml_mul_mat_a8(W, X, quant_type, row)
|
torch27-cxx11-cu128-x86_64-linux/ggml/_ggml_4fb4e1d_dirty.abi3.so
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:30e77d21fba079480fc470092167da87ac23a84fae8f45a5b0a3ca8b5f48d0e7
|
3 |
+
size 9880456
|
torch27-cxx11-cu128-x86_64-linux/ggml/_ops.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from . import _ggml_4fb4e1d_dirty
|
3 |
+
ops = torch.ops._ggml_4fb4e1d_dirty
|
4 |
+
|
5 |
+
def add_op_namespace_prefix(op_name: str):
|
6 |
+
"""
|
7 |
+
Prefix op by namespace.
|
8 |
+
"""
|
9 |
+
return f"_ggml_4fb4e1d_dirty::{op_name}"
|