Spaces:
Sleeping
Sleeping
Commit
·
a7585d3
1
Parent(s):
30b652f
FlexChunk Demo: Complete interactive application for sparse matrix-vector multiplication benchmarking
Browse files
app.py
CHANGED
@@ -83,7 +83,7 @@ def generate_sparse_matrix(size, density, challenging=False):
|
|
83 |
|
84 |
# --- Benchmark Function (Placeholder) ---
|
85 |
|
86 |
-
def run_benchmark(size, density, num_chunks, challenging, progress=gr.Progress()):
|
87 |
# This function will contain the main logic from test_vs_scipy.py
|
88 |
# Adapted for Gradio inputs and outputs
|
89 |
progress(0, desc="Starting Benchmark...")
|
@@ -122,60 +122,94 @@ def run_benchmark(size, density, num_chunks, challenging, progress=gr.Progress()
|
|
122 |
flex_compute_time = time.time() - flex_compute_start
|
123 |
flex_total_time = load_time + flex_compute_time
|
124 |
|
125 |
-
#
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
157 |
# --- Cleanup ---
|
158 |
shutil.rmtree(storage_dir)
|
159 |
|
160 |
progress(1.0, desc="Benchmark Complete")
|
161 |
|
162 |
# --- Format Output ---
|
163 |
-
|
|
|
|
|
164 |
{matrix_info}
|
165 |
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
171 |
|
172 |
-
|
173 |
-
- Save Data Time: {scipy_save_time:.4f}s (For reference)
|
174 |
-
- Load Data Time: {scipy_load_time:.4f}s
|
175 |
-
- Compute Time: {scipy_compute_time:.4f}s
|
176 |
-
- **Total (Load+Compute): {scipy_total_time:.4f}s**
|
177 |
|
178 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
179 |
{comparison_result}
|
180 |
"""
|
181 |
|
@@ -183,38 +217,146 @@ def run_benchmark(size, density, num_chunks, challenging, progress=gr.Progress()
|
|
183 |
|
184 |
# --- Gradio Interface ---
|
185 |
|
186 |
-
with gr.Blocks() as demo:
|
187 |
gr.Markdown("""
|
188 |
-
# FlexChunk: Out-of-Core Sparse Matrix-Vector Multiplication
|
189 |
|
190 |
-
This demo
|
191 |
-
simulating an out-of-core scenario where the matrix doesn't fit entirely in memory.
|
192 |
|
193 |
-
|
194 |
-
|
|
|
|
|
195 |
""")
|
196 |
|
197 |
-
with gr.
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
206 |
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
215 |
)
|
216 |
|
217 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
218 |
|
219 |
# Launch the app
|
220 |
if __name__ == "__main__":
|
|
|
83 |
|
84 |
# --- Benchmark Function (Placeholder) ---
|
85 |
|
86 |
+
def run_benchmark(size, density, num_chunks, challenging, flex_only=False, progress=gr.Progress()):
|
87 |
# This function will contain the main logic from test_vs_scipy.py
|
88 |
# Adapted for Gradio inputs and outputs
|
89 |
progress(0, desc="Starting Benchmark...")
|
|
|
122 |
flex_compute_time = time.time() - flex_compute_start
|
123 |
flex_total_time = load_time + flex_compute_time
|
124 |
|
125 |
+
# Estimate FlexChunk memory usage
|
126 |
+
max_chunk_size = max(chunk.data.nbytes + chunk.col_indices.nbytes + chunk.row_offsets.nbytes for chunk in chunks)
|
127 |
+
flex_operational_memory = max_chunk_size + vector.nbytes + (size * 8) # Chunk + vector + result vector
|
128 |
+
flex_memory_mb = flex_operational_memory / (1024*1024)
|
129 |
+
|
130 |
+
# --- SciPy Run (Optional) ---
|
131 |
+
if not flex_only:
|
132 |
+
progress(0.7, desc="Saving SciPy data...")
|
133 |
+
scipy_temp_dir = storage_dir / "scipy_temp"
|
134 |
+
scipy_temp_dir.mkdir(exist_ok=True)
|
135 |
+
matrix_file = scipy_temp_dir / "matrix.npz"
|
136 |
+
vector_file = scipy_temp_dir / "vector.npy"
|
137 |
+
|
138 |
+
scipy_save_start = time.time()
|
139 |
+
sparse.save_npz(matrix_file, matrix)
|
140 |
+
np.save(vector_file, vector)
|
141 |
+
scipy_save_time = time.time() - scipy_save_start
|
142 |
+
|
143 |
+
progress(0.8, desc="Loading SciPy data...")
|
144 |
+
scipy_load_start = time.time()
|
145 |
+
loaded_matrix = sparse.load_npz(matrix_file)
|
146 |
+
loaded_vector = np.load(vector_file)
|
147 |
+
scipy_load_time = time.time() - scipy_load_start
|
148 |
+
|
149 |
+
progress(0.9, desc="Running SciPy SpMV...")
|
150 |
+
scipy_compute_start = time.time()
|
151 |
+
scipy_result = loaded_matrix @ loaded_vector
|
152 |
+
scipy_compute_time = time.time() - scipy_compute_start
|
153 |
+
scipy_total_time = scipy_load_time + scipy_compute_time
|
154 |
+
|
155 |
+
# Estimate SciPy memory usage
|
156 |
+
scipy_memory = loaded_matrix.data.nbytes + loaded_matrix.indices.nbytes + loaded_matrix.indptr.nbytes + loaded_vector.nbytes
|
157 |
+
scipy_memory_mb = scipy_memory / (1024*1024)
|
158 |
+
|
159 |
+
# --- Comparison ---
|
160 |
+
progress(0.95, desc="Comparing results...")
|
161 |
+
diff = np.abs(scipy_result - flex_result)
|
162 |
+
max_diff = np.max(diff) if len(diff) > 0 else 0
|
163 |
+
mean_diff = np.mean(diff) if len(diff) > 0 else 0
|
164 |
+
is_close = np.allclose(scipy_result, flex_result, atol=1e-9) # Increased tolerance slightly
|
165 |
+
comparison_result = f"✅ Results Match! (Max Diff: {max_diff:.2e}, Mean Diff: {mean_diff:.2e})" if is_close else f"❌ Results Differ! (Max Diff: {max_diff:.2e}, Mean Diff: {mean_diff:.2e})"
|
166 |
+
|
167 |
# --- Cleanup ---
|
168 |
shutil.rmtree(storage_dir)
|
169 |
|
170 |
progress(1.0, desc="Benchmark Complete")
|
171 |
|
172 |
# --- Format Output ---
|
173 |
+
if flex_only:
|
174 |
+
results_summary = f"""
|
175 |
+
## Matrix Information
|
176 |
{matrix_info}
|
177 |
|
178 |
+
## FlexChunk Performance
|
179 |
+
| Stage | Time |
|
180 |
+
|-------|------|
|
181 |
+
| Prepare Chunks | {prepare_time:.4f}s |
|
182 |
+
| Load Chunks | {load_time:.4f}s |
|
183 |
+
| Compute | {flex_compute_time:.4f}s |
|
184 |
+
| **Total (Load+Compute)** | **{flex_total_time:.4f}s** |
|
185 |
+
|
186 |
+
## Memory Usage
|
187 |
+
| Metric | Value |
|
188 |
+
|--------|-------|
|
189 |
+
| Peak RAM Usage | {flex_memory_mb:.2f} MB |
|
190 |
+
| Chunks | {num_chunks} |
|
191 |
+
"""
|
192 |
+
else:
|
193 |
+
results_summary = f"""
|
194 |
+
## Matrix Information
|
195 |
+
{matrix_info}
|
196 |
|
197 |
+
## Performance Comparison
|
|
|
|
|
|
|
|
|
198 |
|
199 |
+
| Stage | FlexChunk | SciPy (Out-of-Core) |
|
200 |
+
|-------|-----------|---------------------|
|
201 |
+
| Data Preparation | {prepare_time:.4f}s | {scipy_save_time:.4f}s |
|
202 |
+
| Load Time | {load_time:.4f}s | {scipy_load_time:.4f}s |
|
203 |
+
| Compute Time | {flex_compute_time:.4f}s | {scipy_compute_time:.4f}s |
|
204 |
+
| **Total (Load+Compute)** | **{flex_total_time:.4f}s** | **{scipy_total_time:.4f}s** |
|
205 |
+
|
206 |
+
## Memory Usage
|
207 |
+
| Metric | FlexChunk | SciPy |
|
208 |
+
|--------|-----------|-------|
|
209 |
+
| Peak RAM Usage | {flex_memory_mb:.2f} MB | {scipy_memory_mb:.2f} MB |
|
210 |
+
| Memory Ratio | 1.0x | {scipy_memory_mb/flex_memory_mb:.2f}x |
|
211 |
+
|
212 |
+
## Comparison
|
213 |
{comparison_result}
|
214 |
"""
|
215 |
|
|
|
217 |
|
218 |
# --- Gradio Interface ---
|
219 |
|
220 |
+
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
221 |
gr.Markdown("""
|
222 |
+
# FlexChunk: Out-of-Core Sparse Matrix-Vector Multiplication
|
223 |
|
224 |
+
This interactive demo showcases **FlexChunk**, an algorithm for performing Sparse Matrix-Vector Multiplication (SpMV) on matrices that may be too large to fit entirely in memory.
|
|
|
225 |
|
226 |
+
**Key Benefits:**
|
227 |
+
* Process matrices up to 100M×100M using only ~1.7GB RAM
|
228 |
+
* Near-linear scaling in both time and memory usage
|
229 |
+
* Outperforms traditional approaches for large out-of-core matrices
|
230 |
""")
|
231 |
|
232 |
+
with gr.Tabs() as tabs:
|
233 |
+
# Standard mode tab
|
234 |
+
with gr.TabItem("Standard Mode"):
|
235 |
+
with gr.Row():
|
236 |
+
with gr.Column():
|
237 |
+
gr.Markdown("### Matrix Parameters")
|
238 |
+
standard_size = gr.Slider(
|
239 |
+
label="Matrix Size (N×N)",
|
240 |
+
minimum=1000,
|
241 |
+
maximum=200000,
|
242 |
+
value=10000,
|
243 |
+
step=1000,
|
244 |
+
info="Square matrix dimension (N×N)"
|
245 |
+
)
|
246 |
+
standard_density = gr.Slider(
|
247 |
+
label="Matrix Density",
|
248 |
+
minimum=0.00001,
|
249 |
+
maximum=0.01,
|
250 |
+
value=0.0001,
|
251 |
+
step=0.00001,
|
252 |
+
info="Fraction of non-zero elements (0.0001 = 0.01%)"
|
253 |
+
)
|
254 |
+
standard_chunks = gr.Slider(
|
255 |
+
label="Number of Chunks",
|
256 |
+
minimum=1,
|
257 |
+
maximum=32,
|
258 |
+
value=4,
|
259 |
+
step=1,
|
260 |
+
info="More chunks = less memory but more overhead"
|
261 |
+
)
|
262 |
+
standard_challenging = gr.Checkbox(
|
263 |
+
label="Use Challenging Matrix",
|
264 |
+
info="Includes extreme values and special patterns"
|
265 |
+
)
|
266 |
+
standard_flexonly = gr.Checkbox(
|
267 |
+
label="FlexChunk Only",
|
268 |
+
info="Skip SciPy comparison for better performance"
|
269 |
+
)
|
270 |
+
standard_button = gr.Button("Run Benchmark", variant="primary")
|
271 |
|
272 |
+
standard_output = gr.Markdown()
|
273 |
+
|
274 |
+
# Advanced mode tab
|
275 |
+
with gr.TabItem("Advanced Mode"):
|
276 |
+
with gr.Row():
|
277 |
+
with gr.Column():
|
278 |
+
gr.Markdown("### Large Matrix Parameters")
|
279 |
+
gr.Markdown("""
|
280 |
+
⚠️ **Warning**: Processing time varies with matrix size:
|
281 |
+
- 1M×1M matrices: ~1 second
|
282 |
+
- 10M×10M matrices: ~10 seconds
|
283 |
+
- 100M×100M matrices: ~1 minute 47 seconds
|
284 |
+
|
285 |
+
For large matrices, FlexChunk-only mode is automatically enabled.
|
286 |
+
""")
|
287 |
+
|
288 |
+
advanced_size = gr.Slider(
|
289 |
+
label="Matrix Size (N×N)",
|
290 |
+
minimum=50000,
|
291 |
+
maximum=300000000,
|
292 |
+
value=100000,
|
293 |
+
step=50000,
|
294 |
+
info="Square matrix dimension - up to 300M×300M (extremely large values will take significant time)"
|
295 |
+
)
|
296 |
+
advanced_density = gr.Slider(
|
297 |
+
label="Matrix Density",
|
298 |
+
minimum=0.0000001,
|
299 |
+
maximum=0.001,
|
300 |
+
value=0.000001,
|
301 |
+
step=0.0000001,
|
302 |
+
info="Use lower density for very large matrices"
|
303 |
+
)
|
304 |
+
advanced_chunks = gr.Slider(
|
305 |
+
label="Number of Chunks",
|
306 |
+
minimum=4,
|
307 |
+
maximum=100,
|
308 |
+
value=10,
|
309 |
+
step=1,
|
310 |
+
info="More chunks recommended for larger matrices"
|
311 |
+
)
|
312 |
+
advanced_challenging = gr.Checkbox(
|
313 |
+
label="Use Challenging Matrix",
|
314 |
+
info="Includes extreme values and special patterns"
|
315 |
+
)
|
316 |
+
|
317 |
+
# Force FlexChunk only for advanced mode
|
318 |
+
gr.Markdown("*SciPy comparison disabled for large matrices*")
|
319 |
+
advanced_button = gr.Button("Run Advanced Benchmark", variant="primary")
|
320 |
+
|
321 |
+
advanced_output = gr.Markdown()
|
322 |
+
|
323 |
+
# Event handlers
|
324 |
+
standard_button.click(
|
325 |
+
fn=run_benchmark,
|
326 |
+
inputs=[standard_size, standard_density, standard_chunks, standard_challenging, standard_flexonly],
|
327 |
+
outputs=standard_output
|
328 |
)
|
329 |
|
330 |
+
advanced_button.click(
|
331 |
+
fn=lambda size, density, chunks, challenging: run_benchmark(size, density, chunks, challenging, True),
|
332 |
+
inputs=[advanced_size, advanced_density, advanced_chunks, advanced_challenging],
|
333 |
+
outputs=advanced_output
|
334 |
+
)
|
335 |
+
|
336 |
+
gr.Markdown("""
|
337 |
+
---
|
338 |
+
### About FlexChunk
|
339 |
+
|
340 |
+
FlexChunk enables processing matrices that would normally exceed RAM capacity by dividing them into manageable chunks.
|
341 |
+
|
342 |
+
**Links:**
|
343 |
+
- Read more in the [original article](https://www.lesswrong.com/posts/zpRhsdDkWygTDScxb/flexchunk-enabling-100m-100m-out-of-core-spmv-1-8-min-1-7-gb)
|
344 |
+
- View source code on [GitHub](https://github.com/DanielSwift1992/FlexChunk)
|
345 |
+
|
346 |
+
---
|
347 |
+
### Benchmark Results
|
348 |
+
|
349 |
+
Actual performance measurements from our tests:
|
350 |
+
|
351 |
+
| Matrix Size | Non-zero Elements | Total Time | Peak RAM Usage |
|
352 |
+
|-----------------|-------------------|---------------|----------------|
|
353 |
+
| 1.0M × 1.0M | 1.2M | 1.07 s | 17.00 MB |
|
354 |
+
| 10.0M × 10.0M | 12.0M | 10.21 s | 170.00 MB |
|
355 |
+
| 50.0M × 50.0M | 62.5M | 55.27 s | 850.00 MB |
|
356 |
+
| 100.0M × 100.0M | 120.0M | 1 min 47.1 s | 1.70 GB |
|
357 |
+
|
358 |
+
The algorithm scales nearly linearly and can theoretically handle even larger matrices (up to 300M×300M), with proportionally increased processing time and memory usage.
|
359 |
+
""")
|
360 |
|
361 |
# Launch the app
|
362 |
if __name__ == "__main__":
|