Spaces:
Sleeping
Sleeping
Commit
·
30b652f
0
Parent(s):
Initial commit of FlexChunk Hugging Face app
Browse files- .gitattributes +1 -0
- README.md +51 -0
- app.py +221 -0
- flex_chunk.py +214 -0
- matrix_multiply.py +198 -0
- requirements.txt +3 -0
.gitattributes
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
*.png filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: FlexChunk SpMV Demo
|
3 |
+
emoji: ⚡
|
4 |
+
colorFrom: blue
|
5 |
+
colorTo: green
|
6 |
+
sdk: gradio
|
7 |
+
sdk_version: 4.39.0 # Or check your gradio version
|
8 |
+
app_file: app.py
|
9 |
+
pinned: false
|
10 |
+
---
|
11 |
+
|
12 |
+
# FlexChunk: Enabling 100M×100M Out-of-Core SpMV
|
13 |
+
|
14 |
+
This Hugging Face Space demonstrates **FlexChunk**, an algorithm for performing Sparse Matrix-Vector Multiplication (SpMV) on matrices potentially too large to fit in RAM.
|
15 |
+
|
16 |
+
**Key Idea:** Divide the matrix into manageable horizontal chunks, process them sequentially, and use minimal memory (~1.7 GB for 100M×100M SpMV).
|
17 |
+
|
18 |
+
## Interactive Demo
|
19 |
+
|
20 |
+
The app above allows you to:
|
21 |
+
1. Generate a sparse matrix (adjust **Size**, **Density**, **Chunks**, **Challenging** flag).
|
22 |
+
2. Run SpMV using **FlexChunk** (chunking to disk) and **SciPy** (emulating disk load).
|
23 |
+
3. Compare performance (timings) and correctness.
|
24 |
+
|
25 |
+
**Note:** Larger matrices/densities will take longer. Limits are set for public infrastructure.
|
26 |
+
|
27 |
+
## Performance Highlights
|
28 |
+
|
29 |
+
FlexChunk demonstrates near-linear scaling in time and memory for increasing matrix dimensions.
|
30 |
+
|
31 |
+
**Time Performance and Memory Usage:** See the original article for detailed performance graphs and analysis.
|
32 |
+
|
33 |
+
### Benchmark Results
|
34 |
+
|
35 |
+
Selected results showing performance on large matrices:
|
36 |
+
|
37 |
+
| Matrix Size | Non-zero Elements | Total Time | Peak RAM Usage |
|
38 |
+
|--------------------|-------------------|-----------------|----------------|
|
39 |
+
| ... | ... | ... | ... |
|
40 |
+
| 1.0M × 1.0M | 1.2M | 1.07 s | 17.00 MB |
|
41 |
+
| 10.0M × 10.0M | 12.0M | 10.21 s | 170.00 MB |
|
42 |
+
| 30.0M × 30.0M | 36.0M | 31.13 s | 510.00 MB |
|
43 |
+
| 50.0M × 50.0M | 62.5M | 55.27 s | 850.00 MB |
|
44 |
+
| 70.0M × 70.0M | 88.2M | 1 min 17.1 s | 1.19 GB |
|
45 |
+
| **100.0M × 100.0M**| **120.0M** | **1 min 47.1 s**| **1.70 GB** |
|
46 |
+
|
47 |
+
*(Full table in the original [README](https://github.com/your-repo/FlexChunk/blob/main/README.md) - **Update this link!**)*
|
48 |
+
|
49 |
+
## More Information
|
50 |
+
|
51 |
+
See the original post for technical details: [FlexChunk: Enabling 100M×100M Out-of-Core SpMV](https://www.lesswrong.com/posts/zpRhsdDkWygTDScxb/flexchunk-enabling-100m-100m-out-of-core-spmv-1-8-min-1-7-gb)
|
app.py
ADDED
@@ -0,0 +1,221 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import numpy as np
|
3 |
+
import scipy.sparse as sparse
|
4 |
+
import time
|
5 |
+
import os
|
6 |
+
import shutil
|
7 |
+
import math
|
8 |
+
import sys
|
9 |
+
from pathlib import Path
|
10 |
+
|
11 |
+
# Assuming flex_chunk.py and matrix_multiply.py are in the same directory
|
12 |
+
from flex_chunk import FlexChunk, save_chunk, load_chunk
|
13 |
+
from matrix_multiply import prepare_chunks, load_chunks, matrix_vector_multiply
|
14 |
+
|
15 |
+
# --- Matrix Generation (copied from test_vs_scipy.py) ---
|
16 |
+
|
17 |
+
def generate_sparse_matrix(size, density, challenging=False):
|
18 |
+
"""
|
19 |
+
Generate a sparse test matrix with optional challenging patterns.
|
20 |
+
|
21 |
+
Args:
|
22 |
+
size: Matrix size (n x n)
|
23 |
+
density: Target density
|
24 |
+
challenging: Whether to include challenging patterns and extreme values
|
25 |
+
|
26 |
+
Returns:
|
27 |
+
A scipy.sparse.csr_matrix
|
28 |
+
"""
|
29 |
+
# Calculate number of non-zeros
|
30 |
+
nnz = int(size * size * density)
|
31 |
+
if nnz == 0: # Ensure at least one non-zero element if density is very low
|
32 |
+
nnz = 1
|
33 |
+
|
34 |
+
if not challenging:
|
35 |
+
# Simple random matrix
|
36 |
+
rows = np.random.randint(0, size, nnz)
|
37 |
+
cols = np.random.randint(0, size, nnz)
|
38 |
+
data = np.random.rand(nnz)
|
39 |
+
# Ensure the matrix actually has the specified size if nnz is small
|
40 |
+
if nnz < size:
|
41 |
+
# Add diagonal elements to ensure size
|
42 |
+
diag_indices = np.arange(min(nnz, size))
|
43 |
+
rows = np.concatenate([rows, diag_indices])
|
44 |
+
cols = np.concatenate([cols, diag_indices])
|
45 |
+
data = np.concatenate([data, np.ones(len(diag_indices))]) # Use 1 for diagonal
|
46 |
+
|
47 |
+
matrix = sparse.csr_matrix((data, (rows, cols)), shape=(size, size))
|
48 |
+
matrix.sum_duplicates() # Consolidate duplicate entries
|
49 |
+
return matrix
|
50 |
+
|
51 |
+
# --- Challenging matrix with specific patterns ---
|
52 |
+
# Base random matrix (80% of non-zeros)
|
53 |
+
base_nnz = int(nnz * 0.8)
|
54 |
+
rows = np.random.randint(0, size, base_nnz)
|
55 |
+
cols = np.random.randint(0, size, base_nnz)
|
56 |
+
data = np.random.rand(base_nnz)
|
57 |
+
|
58 |
+
# Add diagonal elements (10% of non-zeros)
|
59 |
+
diag_nnz = int(nnz * 0.1)
|
60 |
+
diag_indices = np.random.choice(size, diag_nnz, replace=False)
|
61 |
+
|
62 |
+
# Add extreme values (10% of non-zeros)
|
63 |
+
extreme_nnz = max(0, nnz - base_nnz - diag_nnz) # Ensure non-negative
|
64 |
+
extreme_rows = np.random.randint(0, size, extreme_nnz)
|
65 |
+
extreme_cols = np.random.randint(0, size, extreme_nnz)
|
66 |
+
|
67 |
+
# Mix of very large and very small values
|
68 |
+
extreme_data = np.concatenate([
|
69 |
+
np.random.uniform(1e6, 1e9, extreme_nnz // 2),
|
70 |
+
np.random.uniform(1e-9, 1e-6, extreme_nnz - extreme_nnz // 2)
|
71 |
+
]) if extreme_nnz > 0 else np.array([])
|
72 |
+
if extreme_nnz > 0:
|
73 |
+
np.random.shuffle(extreme_data)
|
74 |
+
|
75 |
+
# Combine all components
|
76 |
+
all_rows = np.concatenate([rows, diag_indices, extreme_rows])
|
77 |
+
all_cols = np.concatenate([cols, diag_indices, extreme_cols])
|
78 |
+
all_data = np.concatenate([data, np.random.rand(diag_nnz), extreme_data])
|
79 |
+
|
80 |
+
matrix = sparse.csr_matrix((all_data, (all_rows, all_cols)), shape=(size, size))
|
81 |
+
matrix.sum_duplicates() # Consolidate duplicate entries
|
82 |
+
return matrix
|
83 |
+
|
84 |
+
# --- Benchmark Function (Placeholder) ---
|
85 |
+
|
86 |
+
def run_benchmark(size, density, num_chunks, challenging, progress=gr.Progress()):
|
87 |
+
# This function will contain the main logic from test_vs_scipy.py
|
88 |
+
# Adapted for Gradio inputs and outputs
|
89 |
+
progress(0, desc="Starting Benchmark...")
|
90 |
+
time.sleep(1) # Placeholder
|
91 |
+
|
92 |
+
# 1. Setup storage
|
93 |
+
storage_dir = Path("./flex_chunk_temp_space")
|
94 |
+
if storage_dir.exists():
|
95 |
+
shutil.rmtree(storage_dir)
|
96 |
+
storage_dir.mkdir(exist_ok=True)
|
97 |
+
|
98 |
+
progress(0.1, desc="Generating Matrix...")
|
99 |
+
# 2. Generate matrix and vector
|
100 |
+
matrix = generate_sparse_matrix(size, density, challenging)
|
101 |
+
vector = np.random.rand(size)
|
102 |
+
actual_nnz = matrix.nnz
|
103 |
+
actual_density = actual_nnz / (size * size) if size > 0 else 0
|
104 |
+
|
105 |
+
matrix_info = f"Matrix: {size}x{size}, Target Density: {density:.6f}, Actual Density: {actual_density:.6f}, NNZ: {actual_nnz}"
|
106 |
+
print(matrix_info) # For debugging in Hugging Face console
|
107 |
+
|
108 |
+
# --- FlexChunk Run ---
|
109 |
+
progress(0.2, desc="Preparing FlexChunks...")
|
110 |
+
prepare_start = time.time()
|
111 |
+
prepare_chunks(matrix, num_chunks, str(storage_dir), verbose=False)
|
112 |
+
prepare_time = time.time() - prepare_start
|
113 |
+
|
114 |
+
progress(0.4, desc="Loading FlexChunks...")
|
115 |
+
load_start = time.time()
|
116 |
+
chunks = load_chunks(str(storage_dir), verbose=False)
|
117 |
+
load_time = time.time() - load_start
|
118 |
+
|
119 |
+
progress(0.6, desc="Running FlexChunk SpMV...")
|
120 |
+
flex_compute_start = time.time()
|
121 |
+
flex_result = matrix_vector_multiply(chunks, vector, verbose=False)
|
122 |
+
flex_compute_time = time.time() - flex_compute_start
|
123 |
+
flex_total_time = load_time + flex_compute_time
|
124 |
+
|
125 |
+
# --- SciPy Run ---
|
126 |
+
progress(0.7, desc="Saving SciPy data...")
|
127 |
+
scipy_temp_dir = storage_dir / "scipy_temp"
|
128 |
+
scipy_temp_dir.mkdir(exist_ok=True)
|
129 |
+
matrix_file = scipy_temp_dir / "matrix.npz"
|
130 |
+
vector_file = scipy_temp_dir / "vector.npy"
|
131 |
+
|
132 |
+
scipy_save_start = time.time()
|
133 |
+
sparse.save_npz(matrix_file, matrix)
|
134 |
+
np.save(vector_file, vector)
|
135 |
+
scipy_save_time = time.time() - scipy_save_start
|
136 |
+
|
137 |
+
progress(0.8, desc="Loading SciPy data...")
|
138 |
+
scipy_load_start = time.time()
|
139 |
+
loaded_matrix = sparse.load_npz(matrix_file)
|
140 |
+
loaded_vector = np.load(vector_file)
|
141 |
+
scipy_load_time = time.time() - scipy_load_start
|
142 |
+
|
143 |
+
progress(0.9, desc="Running SciPy SpMV...")
|
144 |
+
scipy_compute_start = time.time()
|
145 |
+
scipy_result = loaded_matrix @ loaded_vector
|
146 |
+
scipy_compute_time = time.time() - scipy_compute_start
|
147 |
+
scipy_total_time = scipy_load_time + scipy_compute_time
|
148 |
+
|
149 |
+
# --- Comparison ---
|
150 |
+
progress(0.95, desc="Comparing results...")
|
151 |
+
diff = np.abs(scipy_result - flex_result)
|
152 |
+
max_diff = np.max(diff) if len(diff) > 0 else 0
|
153 |
+
mean_diff = np.mean(diff) if len(diff) > 0 else 0
|
154 |
+
is_close = np.allclose(scipy_result, flex_result, atol=1e-9) # Increased tolerance slightly
|
155 |
+
comparison_result = f"✅ Results Match! (Max Diff: {max_diff:.2e}, Mean Diff: {mean_diff:.2e})" if is_close else f"❌ Results Differ! (Max Diff: {max_diff:.2e}, Mean Diff: {mean_diff:.2e})"
|
156 |
+
|
157 |
+
# --- Cleanup ---
|
158 |
+
shutil.rmtree(storage_dir)
|
159 |
+
|
160 |
+
progress(1.0, desc="Benchmark Complete")
|
161 |
+
|
162 |
+
# --- Format Output ---
|
163 |
+
results_summary = f"""
|
164 |
+
{matrix_info}
|
165 |
+
|
166 |
+
**FlexChunk Performance:**
|
167 |
+
- Prepare Chunks Time: {prepare_time:.4f}s
|
168 |
+
- Load Chunks Time: {load_time:.4f}s
|
169 |
+
- Compute Time: {flex_compute_time:.4f}s
|
170 |
+
- **Total (Load+Compute): {flex_total_time:.4f}s**
|
171 |
+
|
172 |
+
**SciPy Performance (Out-of-Core Emulation):**
|
173 |
+
- Save Data Time: {scipy_save_time:.4f}s (For reference)
|
174 |
+
- Load Data Time: {scipy_load_time:.4f}s
|
175 |
+
- Compute Time: {scipy_compute_time:.4f}s
|
176 |
+
- **Total (Load+Compute): {scipy_total_time:.4f}s**
|
177 |
+
|
178 |
+
**Comparison:**
|
179 |
+
{comparison_result}
|
180 |
+
"""
|
181 |
+
|
182 |
+
return results_summary
|
183 |
+
|
184 |
+
# --- Gradio Interface ---
|
185 |
+
|
186 |
+
with gr.Blocks() as demo:
|
187 |
+
gr.Markdown("""
|
188 |
+
# FlexChunk: Out-of-Core Sparse Matrix-Vector Multiplication (SpMV) Demo
|
189 |
+
|
190 |
+
This demo compares the performance of FlexChunk against standard SciPy for SpMV,
|
191 |
+
simulating an out-of-core scenario where the matrix doesn't fit entirely in memory.
|
192 |
+
|
193 |
+
FlexChunk splits the matrix into smaller chunks, processing them sequentially to reduce peak memory usage.
|
194 |
+
SciPy performance includes the time to save and load the matrix from disk to mimic this out-of-core access.
|
195 |
+
""")
|
196 |
+
|
197 |
+
with gr.Row():
|
198 |
+
with gr.Column(scale=1):
|
199 |
+
gr.Markdown("**Benchmark Parameters**")
|
200 |
+
size_input = gr.Slider(label="Matrix Size (N x N)", minimum=100, maximum=50000, value=10000, step=100)
|
201 |
+
# Max density adjusted to prevent excessive nnz for large matrices in demo
|
202 |
+
density_input = gr.Slider(label="Matrix Density", minimum=0.00001, maximum=0.01, value=0.0001, step=0.00001, format="%.5f")
|
203 |
+
chunks_input = gr.Slider(label="Number of Chunks", minimum=1, maximum=32, value=4, step=1)
|
204 |
+
challenging_input = gr.Checkbox(label="Use Challenging Matrix (Extreme Values)", value=False)
|
205 |
+
run_button = gr.Button("Run Benchmark", variant="primary")
|
206 |
+
|
207 |
+
with gr.Column(scale=2):
|
208 |
+
gr.Markdown("**Results**")
|
209 |
+
output_textbox = gr.Markdown(label="Benchmark Summary")
|
210 |
+
|
211 |
+
run_button.click(
|
212 |
+
fn=run_benchmark,
|
213 |
+
inputs=[size_input, density_input, chunks_input, challenging_input],
|
214 |
+
outputs=[output_textbox]
|
215 |
+
)
|
216 |
+
|
217 |
+
gr.Markdown("--- Developed based on the [FlexChunk concept](https://www.lesswrong.com/posts/zpRhsdDkWygTDScxb/flexchunk-enabling-100m-100m-out-of-core-spmv-1-8-min-1-7-gb).")
|
218 |
+
|
219 |
+
# Launch the app
|
220 |
+
if __name__ == "__main__":
|
221 |
+
demo.launch()
|
flex_chunk.py
ADDED
@@ -0,0 +1,214 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
FlexChunk - Minimal implementation of optimized data structure for sparse matrix chunks.
|
3 |
+
|
4 |
+
Ref: T4, T5, T13
|
5 |
+
"""
|
6 |
+
|
7 |
+
import numpy as np
|
8 |
+
import os
|
9 |
+
import struct
|
10 |
+
from typing import Tuple, Optional
|
11 |
+
import scipy.sparse as sparse
|
12 |
+
|
13 |
+
# Magic number for binary format identification
|
14 |
+
FLEX_CHUNK_MAGIC = b'FLXCHK01'
|
15 |
+
|
16 |
+
class FlexChunk:
|
17 |
+
"""
|
18 |
+
A flexible sparse matrix chunk representation optimized for efficient operations.
|
19 |
+
|
20 |
+
Ref: T4, T10
|
21 |
+
"""
|
22 |
+
def __init__(self,
|
23 |
+
start_row: int,
|
24 |
+
num_rows: int,
|
25 |
+
row_offsets: np.ndarray,
|
26 |
+
col_indices: np.ndarray,
|
27 |
+
data: np.ndarray,
|
28 |
+
shape: Optional[Tuple[int, int]] = None):
|
29 |
+
"""
|
30 |
+
Initialize a FlexChunk from raw CSR data
|
31 |
+
|
32 |
+
Ref: T4
|
33 |
+
|
34 |
+
Args:
|
35 |
+
start_row: Global starting row index
|
36 |
+
num_rows: Number of rows in this chunk
|
37 |
+
row_offsets: CSR row pointer array (length num_rows+1)
|
38 |
+
col_indices: CSR column indices array
|
39 |
+
data: CSR data values array
|
40 |
+
shape: Optional matrix shape (rows, cols). If not provided, will be inferred.
|
41 |
+
"""
|
42 |
+
self.start_row = start_row
|
43 |
+
self.num_rows = num_rows
|
44 |
+
self.end_row = start_row + num_rows
|
45 |
+
|
46 |
+
# Validate row_offsets
|
47 |
+
if len(row_offsets) != num_rows + 1:
|
48 |
+
raise ValueError(f"row_offsets must have length {num_rows + 1}, got {len(row_offsets)}")
|
49 |
+
if not np.all(np.diff(row_offsets) >= 0):
|
50 |
+
raise ValueError("row_offsets must be monotonically increasing")
|
51 |
+
|
52 |
+
# [T4] Preserve structural representation
|
53 |
+
self.row_offsets = row_offsets
|
54 |
+
self.col_indices = col_indices
|
55 |
+
self.data = data
|
56 |
+
|
57 |
+
# Determine number of columns
|
58 |
+
if shape is not None:
|
59 |
+
self.n_cols = shape[1]
|
60 |
+
elif len(col_indices) > 0:
|
61 |
+
# If shape not provided, determine by max column index
|
62 |
+
self.n_cols = col_indices.max() + 1
|
63 |
+
else:
|
64 |
+
self.n_cols = 0
|
65 |
+
|
66 |
+
# Save full matrix shape
|
67 |
+
self.shape = (num_rows, self.n_cols)
|
68 |
+
|
69 |
+
# Stats
|
70 |
+
self.nnz = len(data)
|
71 |
+
|
72 |
+
def process_with_vector(self, vector: np.ndarray) -> np.ndarray:
|
73 |
+
"""
|
74 |
+
Multiply chunk with a vector
|
75 |
+
|
76 |
+
Ref: T5, T13
|
77 |
+
|
78 |
+
Args:
|
79 |
+
vector: Vector to multiply with
|
80 |
+
|
81 |
+
Returns:
|
82 |
+
Result of multiplication
|
83 |
+
"""
|
84 |
+
if len(vector) != self.n_cols:
|
85 |
+
raise ValueError(f"Vector length {len(vector)} does not match matrix columns {self.n_cols}")
|
86 |
+
|
87 |
+
# [T5] Skip processing for empty data
|
88 |
+
if self.nnz == 0:
|
89 |
+
return np.zeros(self.num_rows, dtype=vector.dtype)
|
90 |
+
|
91 |
+
# Create result buffer
|
92 |
+
result = np.zeros(self.num_rows, dtype=vector.dtype)
|
93 |
+
|
94 |
+
# [T13] Optimize computation flow
|
95 |
+
for i in range(self.num_rows):
|
96 |
+
start_idx = self.row_offsets[i]
|
97 |
+
end_idx = self.row_offsets[i+1]
|
98 |
+
|
99 |
+
# [T5] Process only non-zero elements
|
100 |
+
for j in range(start_idx, end_idx):
|
101 |
+
col = self.col_indices[j]
|
102 |
+
if col < len(vector):
|
103 |
+
result[i] += self.data[j] * vector[col]
|
104 |
+
|
105 |
+
return result
|
106 |
+
|
107 |
+
@classmethod
|
108 |
+
def from_csr_matrix(cls,
|
109 |
+
matrix: sparse.csr_matrix,
|
110 |
+
start_row: int = 0,
|
111 |
+
end_row: Optional[int] = None) -> 'FlexChunk':
|
112 |
+
"""
|
113 |
+
Create a FlexChunk from a CSR matrix (full or slice)
|
114 |
+
|
115 |
+
Ref: T4, T9
|
116 |
+
|
117 |
+
Args:
|
118 |
+
matrix: A scipy.sparse.csr_matrix
|
119 |
+
start_row: Global start row index
|
120 |
+
end_row: Global end row index (optional)
|
121 |
+
|
122 |
+
Returns:
|
123 |
+
A new FlexChunk
|
124 |
+
"""
|
125 |
+
if not sparse.isspmatrix_csr(matrix):
|
126 |
+
matrix = matrix.tocsr()
|
127 |
+
|
128 |
+
if end_row is None:
|
129 |
+
end_row = start_row + matrix.shape[0]
|
130 |
+
|
131 |
+
num_rows = end_row - start_row
|
132 |
+
|
133 |
+
if num_rows != matrix.shape[0]:
|
134 |
+
raise ValueError(f"Matrix shape {matrix.shape} doesn't match row range {start_row}:{end_row}")
|
135 |
+
|
136 |
+
# [T4] Maintain data structure integrity
|
137 |
+
row_offsets = matrix.indptr.copy()
|
138 |
+
col_indices = matrix.indices.copy()
|
139 |
+
data = matrix.data.copy()
|
140 |
+
|
141 |
+
return cls(
|
142 |
+
start_row=start_row,
|
143 |
+
num_rows=num_rows,
|
144 |
+
row_offsets=row_offsets,
|
145 |
+
col_indices=col_indices,
|
146 |
+
data=data,
|
147 |
+
shape=matrix.shape
|
148 |
+
)
|
149 |
+
|
150 |
+
def save_chunk(chunk: FlexChunk, filepath: str) -> None:
|
151 |
+
"""
|
152 |
+
Save a FlexChunk to a binary file.
|
153 |
+
|
154 |
+
Ref: T4
|
155 |
+
|
156 |
+
Args:
|
157 |
+
chunk: The FlexChunk to save
|
158 |
+
filepath: Path to save the file
|
159 |
+
"""
|
160 |
+
with open(filepath, 'wb') as f:
|
161 |
+
# Write the magic number
|
162 |
+
f.write(FLEX_CHUNK_MAGIC)
|
163 |
+
|
164 |
+
# [T4] Store structural representation
|
165 |
+
f.write(struct.pack('q', chunk.start_row))
|
166 |
+
f.write(struct.pack('q', chunk.num_rows))
|
167 |
+
f.write(struct.pack('q', chunk.nnz))
|
168 |
+
f.write(struct.pack('q', chunk.n_cols))
|
169 |
+
|
170 |
+
# Write arrays
|
171 |
+
f.write(chunk.row_offsets.astype(np.int32).tobytes())
|
172 |
+
f.write(chunk.col_indices.astype(np.int32).tobytes())
|
173 |
+
f.write(chunk.data.astype(np.float64).tobytes())
|
174 |
+
|
175 |
+
def load_chunk(filepath: str) -> FlexChunk:
|
176 |
+
"""
|
177 |
+
Load a FlexChunk from a binary file.
|
178 |
+
|
179 |
+
Ref: T4
|
180 |
+
|
181 |
+
Args:
|
182 |
+
filepath: Path to the file
|
183 |
+
|
184 |
+
Returns:
|
185 |
+
Loaded FlexChunk
|
186 |
+
"""
|
187 |
+
with open(filepath, 'rb') as f:
|
188 |
+
# Verify the magic number
|
189 |
+
magic = f.read(len(FLEX_CHUNK_MAGIC))
|
190 |
+
if magic != FLEX_CHUNK_MAGIC:
|
191 |
+
raise ValueError(f"Invalid file format for {filepath}")
|
192 |
+
|
193 |
+
# [T4] Restore structural representation
|
194 |
+
start_row = struct.unpack('q', f.read(8))[0]
|
195 |
+
num_rows = struct.unpack('q', f.read(8))[0]
|
196 |
+
nnz = struct.unpack('q', f.read(8))[0]
|
197 |
+
n_cols = struct.unpack('q', f.read(8))[0]
|
198 |
+
|
199 |
+
# Read arrays
|
200 |
+
row_offsets = np.frombuffer(f.read((num_rows + 1) * 4), dtype=np.int32)
|
201 |
+
col_indices = np.frombuffer(f.read(nnz * 4), dtype=np.int32)
|
202 |
+
data = np.frombuffer(f.read(nnz * 8), dtype=np.float64)
|
203 |
+
|
204 |
+
# Create the FlexChunk with explicit shape
|
205 |
+
chunk = FlexChunk(
|
206 |
+
start_row=start_row,
|
207 |
+
num_rows=num_rows,
|
208 |
+
row_offsets=row_offsets,
|
209 |
+
col_indices=col_indices,
|
210 |
+
data=data,
|
211 |
+
shape=(num_rows, n_cols)
|
212 |
+
)
|
213 |
+
|
214 |
+
return chunk
|
matrix_multiply.py
ADDED
@@ -0,0 +1,198 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Minimal implementation of matrix-vector multiplication using FlexChunk format.
|
3 |
+
Includes only direct (single-process) multiplication and chunking functions.
|
4 |
+
|
5 |
+
Ref: T9, T10, T13
|
6 |
+
"""
|
7 |
+
|
8 |
+
import os
|
9 |
+
import math
|
10 |
+
import time
|
11 |
+
import numpy as np
|
12 |
+
import scipy.sparse as sparse
|
13 |
+
from typing import List, Optional
|
14 |
+
|
15 |
+
from flex_chunk import FlexChunk, save_chunk, load_chunk
|
16 |
+
|
17 |
+
def prepare_chunks(matrix: sparse.csr_matrix,
|
18 |
+
num_chunks: int,
|
19 |
+
storage_dir: str,
|
20 |
+
verbose: bool = False) -> List[str]:
|
21 |
+
"""
|
22 |
+
Prepare chunks from a sparse matrix for processing.
|
23 |
+
|
24 |
+
Ref: T4, T9
|
25 |
+
|
26 |
+
Args:
|
27 |
+
matrix: Sparse matrix to split into chunks
|
28 |
+
num_chunks: Number of chunks to create
|
29 |
+
storage_dir: Directory to store chunks
|
30 |
+
verbose: Whether to print debug information
|
31 |
+
|
32 |
+
Returns:
|
33 |
+
List of paths to the created chunks
|
34 |
+
"""
|
35 |
+
if not sparse.isspmatrix_csr(matrix):
|
36 |
+
matrix = matrix.tocsr()
|
37 |
+
|
38 |
+
# Ensure the storage directory exists
|
39 |
+
os.makedirs(storage_dir, exist_ok=True)
|
40 |
+
os.makedirs(os.path.join(storage_dir, "chunks"), exist_ok=True)
|
41 |
+
|
42 |
+
# [T9] Divide data into independent processing units
|
43 |
+
rows_per_chunk = max(1, math.ceil(matrix.shape[0] / num_chunks))
|
44 |
+
|
45 |
+
# Create and save chunks
|
46 |
+
chunk_paths = []
|
47 |
+
for i in range(num_chunks):
|
48 |
+
start_row = i * rows_per_chunk
|
49 |
+
end_row = min((i + 1) * rows_per_chunk, matrix.shape[0])
|
50 |
+
|
51 |
+
if start_row >= matrix.shape[0]:
|
52 |
+
break
|
53 |
+
|
54 |
+
# Extract the submatrix for this chunk
|
55 |
+
chunk_matrix = matrix[start_row:end_row, :]
|
56 |
+
|
57 |
+
# [T4] Preserve data structure in chunks
|
58 |
+
chunk = FlexChunk.from_csr_matrix(
|
59 |
+
matrix=chunk_matrix,
|
60 |
+
start_row=start_row,
|
61 |
+
end_row=end_row
|
62 |
+
)
|
63 |
+
|
64 |
+
# Save chunk to file
|
65 |
+
chunk_path = os.path.join(storage_dir, "chunks", f"chunk_{i}.bin")
|
66 |
+
save_chunk(chunk, chunk_path)
|
67 |
+
chunk_paths.append(chunk_path)
|
68 |
+
|
69 |
+
if verbose:
|
70 |
+
print(f"Created chunk {i}: rows {start_row}-{end_row}, nnz: {chunk.nnz}, saved to {chunk_path}")
|
71 |
+
|
72 |
+
# Also save matrix dimensions for later use
|
73 |
+
info_path = os.path.join(storage_dir, "matrix_info.npy")
|
74 |
+
np.save(info_path, np.array([matrix.shape[0], matrix.shape[1]], dtype=np.int64))
|
75 |
+
|
76 |
+
if verbose:
|
77 |
+
print(f"Matrix chunks prepared and saved to {storage_dir}")
|
78 |
+
print(f"Total chunks: {len(chunk_paths)}")
|
79 |
+
print(f"Matrix shape: {matrix.shape}")
|
80 |
+
|
81 |
+
return chunk_paths
|
82 |
+
|
83 |
+
def load_chunks(storage_dir: str, verbose: bool = False) -> List[FlexChunk]:
|
84 |
+
"""
|
85 |
+
Load precomputed chunks from storage directory.
|
86 |
+
|
87 |
+
Ref: T4, T13
|
88 |
+
|
89 |
+
Args:
|
90 |
+
storage_dir: Directory containing saved chunks
|
91 |
+
verbose: Whether to print debug information
|
92 |
+
|
93 |
+
Returns:
|
94 |
+
List of loaded FlexChunk objects
|
95 |
+
"""
|
96 |
+
chunks_dir = os.path.join(storage_dir, "chunks")
|
97 |
+
if not os.path.exists(chunks_dir):
|
98 |
+
raise ValueError(f"Chunks directory {chunks_dir} does not exist")
|
99 |
+
|
100 |
+
# Find all chunk files
|
101 |
+
chunk_files = sorted([f for f in os.listdir(chunks_dir) if f.startswith("chunk_") and f.endswith(".bin")],
|
102 |
+
key=lambda x: int(x.split('_')[1].split('.')[0]))
|
103 |
+
|
104 |
+
if not chunk_files:
|
105 |
+
raise ValueError(f"No chunk files found in {chunks_dir}")
|
106 |
+
|
107 |
+
# [T4] Restore structural representation from storage
|
108 |
+
chunks = []
|
109 |
+
for chunk_file in chunk_files:
|
110 |
+
chunk_path = os.path.join(chunks_dir, chunk_file)
|
111 |
+
chunks.append(load_chunk(chunk_path))
|
112 |
+
|
113 |
+
if verbose:
|
114 |
+
print(f"Loaded {len(chunks)} chunks from {storage_dir}")
|
115 |
+
print(f"Matrix shape: ({chunks[-1].end_row}, {chunks[0].n_cols})")
|
116 |
+
|
117 |
+
return chunks
|
118 |
+
|
119 |
+
def matrix_vector_multiply(chunks: List[FlexChunk],
|
120 |
+
vector: np.ndarray,
|
121 |
+
verbose: bool = False) -> np.ndarray:
|
122 |
+
"""
|
123 |
+
Multiply a sparse matrix with a vector using direct mode and precomputed chunks.
|
124 |
+
|
125 |
+
Ref: T5, T10, T13
|
126 |
+
|
127 |
+
Args:
|
128 |
+
chunks: List of FlexChunk objects representing the matrix
|
129 |
+
vector: Vector to multiply with
|
130 |
+
verbose: Whether to print debug information
|
131 |
+
|
132 |
+
Returns:
|
133 |
+
Result vector from the multiplication
|
134 |
+
"""
|
135 |
+
start_time = time.time()
|
136 |
+
|
137 |
+
if verbose:
|
138 |
+
print("Starting matrix-vector multiplication (direct mode)")
|
139 |
+
|
140 |
+
# Convert vector to numpy array if needed
|
141 |
+
vector = np.asarray(vector)
|
142 |
+
|
143 |
+
# Validate chunks
|
144 |
+
if not chunks:
|
145 |
+
raise ValueError("No chunks provided for multiplication")
|
146 |
+
|
147 |
+
# Check vector dimensions
|
148 |
+
if vector.shape[0] != chunks[0].n_cols:
|
149 |
+
raise ValueError(f"Vector length {vector.shape[0]} does not match matrix columns {chunks[0].n_cols}")
|
150 |
+
|
151 |
+
# Calculate result size based on the end row of the last chunk
|
152 |
+
result_size = max(chunk.end_row for chunk in chunks)
|
153 |
+
|
154 |
+
# Initialize result vector
|
155 |
+
result = np.zeros(result_size, dtype=vector.dtype)
|
156 |
+
|
157 |
+
# [T13] Direct computation through optimized pathways
|
158 |
+
for i, chunk in enumerate(chunks):
|
159 |
+
if verbose:
|
160 |
+
print(f"Processing chunk {i} with {chunk.nnz} non-zeros")
|
161 |
+
|
162 |
+
# [T5] Skip processing for empty chunks
|
163 |
+
if chunk.nnz == 0:
|
164 |
+
continue
|
165 |
+
|
166 |
+
# Multiply chunk with vector
|
167 |
+
chunk_result = chunk.process_with_vector(vector)
|
168 |
+
|
169 |
+
# [T10] Map results to output coordinates
|
170 |
+
result[chunk.start_row:chunk.end_row] = chunk_result
|
171 |
+
|
172 |
+
if verbose:
|
173 |
+
elapsed = time.time() - start_time
|
174 |
+
print(f"Direct multiplication completed in {elapsed:.4f}s")
|
175 |
+
|
176 |
+
return result
|
177 |
+
|
178 |
+
def process_matrix_file(storage_dir: str,
|
179 |
+
vector: np.ndarray,
|
180 |
+
verbose: bool = False) -> np.ndarray:
|
181 |
+
"""
|
182 |
+
Convenience function to load chunks from storage and multiply with vector.
|
183 |
+
|
184 |
+
Ref: T13
|
185 |
+
|
186 |
+
Args:
|
187 |
+
storage_dir: Directory containing saved chunks
|
188 |
+
vector: Vector to multiply with
|
189 |
+
verbose: Whether to print debug information
|
190 |
+
|
191 |
+
Returns:
|
192 |
+
Result vector from the multiplication
|
193 |
+
"""
|
194 |
+
# [T13] Optimize data processing flow
|
195 |
+
chunks = load_chunks(storage_dir, verbose=verbose)
|
196 |
+
|
197 |
+
# Perform multiplication
|
198 |
+
return matrix_vector_multiply(chunks, vector, verbose=verbose)
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
numpy
|
2 |
+
scipy
|
3 |
+
gradio
|