File size: 788 Bytes
452b173
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#ifndef _q4_matrix_cuh
#define _q4_matrix_cuh

#include <cuda_runtime.h>
#include <cuda_fp16.h>
#include <cstdint>

class Q4Matrix
{
public:

    int device;

    int height;
    int width;
    int groups;
    int groupsize;

    uint32_t* cuda_qweight = NULL;
    uint32_t* cuda_qzeros = NULL;
    half* cuda_scales = NULL;
    uint32_t* cuda_x_map = NULL;

    Q4Matrix
    (
        const int _height,
        const int _width,
        const int _groups,

        uint32_t* _qweight,
        uint32_t* _qzeros,
        half* _scales,
        uint32_t* _g_idx,

        const int _device
    );

    ~Q4Matrix();

    void reconstruct(half* out);

private:

    void make_sequential(const uint32_t* cpu_g_idx);

};

void g_q4_keep_matrix(Q4Matrix* m);
void g_q4_free_matrices();

#endif