DanielSwift commited on
Commit
a7585d3
·
1 Parent(s): 30b652f

FlexChunk Demo: Complete interactive application for sparse matrix-vector multiplication benchmarking

Browse files
Files changed (1) hide show
  1. app.py +211 -69
app.py CHANGED
@@ -83,7 +83,7 @@ def generate_sparse_matrix(size, density, challenging=False):
83
 
84
  # --- Benchmark Function (Placeholder) ---
85
 
86
- def run_benchmark(size, density, num_chunks, challenging, progress=gr.Progress()):
87
  # This function will contain the main logic from test_vs_scipy.py
88
  # Adapted for Gradio inputs and outputs
89
  progress(0, desc="Starting Benchmark...")
@@ -122,60 +122,94 @@ def run_benchmark(size, density, num_chunks, challenging, progress=gr.Progress()
122
  flex_compute_time = time.time() - flex_compute_start
123
  flex_total_time = load_time + flex_compute_time
124
 
125
- # --- SciPy Run ---
126
- progress(0.7, desc="Saving SciPy data...")
127
- scipy_temp_dir = storage_dir / "scipy_temp"
128
- scipy_temp_dir.mkdir(exist_ok=True)
129
- matrix_file = scipy_temp_dir / "matrix.npz"
130
- vector_file = scipy_temp_dir / "vector.npy"
131
-
132
- scipy_save_start = time.time()
133
- sparse.save_npz(matrix_file, matrix)
134
- np.save(vector_file, vector)
135
- scipy_save_time = time.time() - scipy_save_start
136
-
137
- progress(0.8, desc="Loading SciPy data...")
138
- scipy_load_start = time.time()
139
- loaded_matrix = sparse.load_npz(matrix_file)
140
- loaded_vector = np.load(vector_file)
141
- scipy_load_time = time.time() - scipy_load_start
142
-
143
- progress(0.9, desc="Running SciPy SpMV...")
144
- scipy_compute_start = time.time()
145
- scipy_result = loaded_matrix @ loaded_vector
146
- scipy_compute_time = time.time() - scipy_compute_start
147
- scipy_total_time = scipy_load_time + scipy_compute_time
148
-
149
- # --- Comparison ---
150
- progress(0.95, desc="Comparing results...")
151
- diff = np.abs(scipy_result - flex_result)
152
- max_diff = np.max(diff) if len(diff) > 0 else 0
153
- mean_diff = np.mean(diff) if len(diff) > 0 else 0
154
- is_close = np.allclose(scipy_result, flex_result, atol=1e-9) # Increased tolerance slightly
155
- comparison_result = f"✅ Results Match! (Max Diff: {max_diff:.2e}, Mean Diff: {mean_diff:.2e})" if is_close else f"❌ Results Differ! (Max Diff: {max_diff:.2e}, Mean Diff: {mean_diff:.2e})"
156
-
 
 
 
 
 
 
 
 
 
 
157
  # --- Cleanup ---
158
  shutil.rmtree(storage_dir)
159
 
160
  progress(1.0, desc="Benchmark Complete")
161
 
162
  # --- Format Output ---
163
- results_summary = f"""
 
 
164
  {matrix_info}
165
 
166
- **FlexChunk Performance:**
167
- - Prepare Chunks Time: {prepare_time:.4f}s
168
- - Load Chunks Time: {load_time:.4f}s
169
- - Compute Time: {flex_compute_time:.4f}s
170
- - **Total (Load+Compute): {flex_total_time:.4f}s**
 
 
 
 
 
 
 
 
 
 
 
 
 
171
 
172
- **SciPy Performance (Out-of-Core Emulation):**
173
- - Save Data Time: {scipy_save_time:.4f}s (For reference)
174
- - Load Data Time: {scipy_load_time:.4f}s
175
- - Compute Time: {scipy_compute_time:.4f}s
176
- - **Total (Load+Compute): {scipy_total_time:.4f}s**
177
 
178
- **Comparison:**
 
 
 
 
 
 
 
 
 
 
 
 
 
179
  {comparison_result}
180
  """
181
 
@@ -183,38 +217,146 @@ def run_benchmark(size, density, num_chunks, challenging, progress=gr.Progress()
183
 
184
  # --- Gradio Interface ---
185
 
186
- with gr.Blocks() as demo:
187
  gr.Markdown("""
188
- # FlexChunk: Out-of-Core Sparse Matrix-Vector Multiplication (SpMV) Demo
189
 
190
- This demo compares the performance of FlexChunk against standard SciPy for SpMV,
191
- simulating an out-of-core scenario where the matrix doesn't fit entirely in memory.
192
 
193
- FlexChunk splits the matrix into smaller chunks, processing them sequentially to reduce peak memory usage.
194
- SciPy performance includes the time to save and load the matrix from disk to mimic this out-of-core access.
 
 
195
  """)
196
 
197
- with gr.Row():
198
- with gr.Column(scale=1):
199
- gr.Markdown("**Benchmark Parameters**")
200
- size_input = gr.Slider(label="Matrix Size (N x N)", minimum=100, maximum=50000, value=10000, step=100)
201
- # Max density adjusted to prevent excessive nnz for large matrices in demo
202
- density_input = gr.Slider(label="Matrix Density", minimum=0.00001, maximum=0.01, value=0.0001, step=0.00001, format="%.5f")
203
- chunks_input = gr.Slider(label="Number of Chunks", minimum=1, maximum=32, value=4, step=1)
204
- challenging_input = gr.Checkbox(label="Use Challenging Matrix (Extreme Values)", value=False)
205
- run_button = gr.Button("Run Benchmark", variant="primary")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
 
207
- with gr.Column(scale=2):
208
- gr.Markdown("**Results**")
209
- output_textbox = gr.Markdown(label="Benchmark Summary")
210
-
211
- run_button.click(
212
- fn=run_benchmark,
213
- inputs=[size_input, density_input, chunks_input, challenging_input],
214
- outputs=[output_textbox]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
215
  )
216
 
217
- gr.Markdown("--- Developed based on the [FlexChunk concept](https://www.lesswrong.com/posts/zpRhsdDkWygTDScxb/flexchunk-enabling-100m-100m-out-of-core-spmv-1-8-min-1-7-gb).")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
 
219
  # Launch the app
220
  if __name__ == "__main__":
 
83
 
84
  # --- Benchmark Function (Placeholder) ---
85
 
86
+ def run_benchmark(size, density, num_chunks, challenging, flex_only=False, progress=gr.Progress()):
87
  # This function will contain the main logic from test_vs_scipy.py
88
  # Adapted for Gradio inputs and outputs
89
  progress(0, desc="Starting Benchmark...")
 
122
  flex_compute_time = time.time() - flex_compute_start
123
  flex_total_time = load_time + flex_compute_time
124
 
125
+ # Estimate FlexChunk memory usage
126
+ max_chunk_size = max(chunk.data.nbytes + chunk.col_indices.nbytes + chunk.row_offsets.nbytes for chunk in chunks)
127
+ flex_operational_memory = max_chunk_size + vector.nbytes + (size * 8) # Chunk + vector + result vector
128
+ flex_memory_mb = flex_operational_memory / (1024*1024)
129
+
130
+ # --- SciPy Run (Optional) ---
131
+ if not flex_only:
132
+ progress(0.7, desc="Saving SciPy data...")
133
+ scipy_temp_dir = storage_dir / "scipy_temp"
134
+ scipy_temp_dir.mkdir(exist_ok=True)
135
+ matrix_file = scipy_temp_dir / "matrix.npz"
136
+ vector_file = scipy_temp_dir / "vector.npy"
137
+
138
+ scipy_save_start = time.time()
139
+ sparse.save_npz(matrix_file, matrix)
140
+ np.save(vector_file, vector)
141
+ scipy_save_time = time.time() - scipy_save_start
142
+
143
+ progress(0.8, desc="Loading SciPy data...")
144
+ scipy_load_start = time.time()
145
+ loaded_matrix = sparse.load_npz(matrix_file)
146
+ loaded_vector = np.load(vector_file)
147
+ scipy_load_time = time.time() - scipy_load_start
148
+
149
+ progress(0.9, desc="Running SciPy SpMV...")
150
+ scipy_compute_start = time.time()
151
+ scipy_result = loaded_matrix @ loaded_vector
152
+ scipy_compute_time = time.time() - scipy_compute_start
153
+ scipy_total_time = scipy_load_time + scipy_compute_time
154
+
155
+ # Estimate SciPy memory usage
156
+ scipy_memory = loaded_matrix.data.nbytes + loaded_matrix.indices.nbytes + loaded_matrix.indptr.nbytes + loaded_vector.nbytes
157
+ scipy_memory_mb = scipy_memory / (1024*1024)
158
+
159
+ # --- Comparison ---
160
+ progress(0.95, desc="Comparing results...")
161
+ diff = np.abs(scipy_result - flex_result)
162
+ max_diff = np.max(diff) if len(diff) > 0 else 0
163
+ mean_diff = np.mean(diff) if len(diff) > 0 else 0
164
+ is_close = np.allclose(scipy_result, flex_result, atol=1e-9) # Increased tolerance slightly
165
+ comparison_result = f"✅ Results Match! (Max Diff: {max_diff:.2e}, Mean Diff: {mean_diff:.2e})" if is_close else f"❌ Results Differ! (Max Diff: {max_diff:.2e}, Mean Diff: {mean_diff:.2e})"
166
+
167
  # --- Cleanup ---
168
  shutil.rmtree(storage_dir)
169
 
170
  progress(1.0, desc="Benchmark Complete")
171
 
172
  # --- Format Output ---
173
+ if flex_only:
174
+ results_summary = f"""
175
+ ## Matrix Information
176
  {matrix_info}
177
 
178
+ ## FlexChunk Performance
179
+ | Stage | Time |
180
+ |-------|------|
181
+ | Prepare Chunks | {prepare_time:.4f}s |
182
+ | Load Chunks | {load_time:.4f}s |
183
+ | Compute | {flex_compute_time:.4f}s |
184
+ | **Total (Load+Compute)** | **{flex_total_time:.4f}s** |
185
+
186
+ ## Memory Usage
187
+ | Metric | Value |
188
+ |--------|-------|
189
+ | Peak RAM Usage | {flex_memory_mb:.2f} MB |
190
+ | Chunks | {num_chunks} |
191
+ """
192
+ else:
193
+ results_summary = f"""
194
+ ## Matrix Information
195
+ {matrix_info}
196
 
197
+ ## Performance Comparison
 
 
 
 
198
 
199
+ | Stage | FlexChunk | SciPy (Out-of-Core) |
200
+ |-------|-----------|---------------------|
201
+ | Data Preparation | {prepare_time:.4f}s | {scipy_save_time:.4f}s |
202
+ | Load Time | {load_time:.4f}s | {scipy_load_time:.4f}s |
203
+ | Compute Time | {flex_compute_time:.4f}s | {scipy_compute_time:.4f}s |
204
+ | **Total (Load+Compute)** | **{flex_total_time:.4f}s** | **{scipy_total_time:.4f}s** |
205
+
206
+ ## Memory Usage
207
+ | Metric | FlexChunk | SciPy |
208
+ |--------|-----------|-------|
209
+ | Peak RAM Usage | {flex_memory_mb:.2f} MB | {scipy_memory_mb:.2f} MB |
210
+ | Memory Ratio | 1.0x | {scipy_memory_mb/flex_memory_mb:.2f}x |
211
+
212
+ ## Comparison
213
  {comparison_result}
214
  """
215
 
 
217
 
218
  # --- Gradio Interface ---
219
 
220
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
221
  gr.Markdown("""
222
+ # FlexChunk: Out-of-Core Sparse Matrix-Vector Multiplication
223
 
224
+ This interactive demo showcases **FlexChunk**, an algorithm for performing Sparse Matrix-Vector Multiplication (SpMV) on matrices that may be too large to fit entirely in memory.
 
225
 
226
+ **Key Benefits:**
227
+ * Process matrices up to 100M×100M using only ~1.7GB RAM
228
+ * Near-linear scaling in both time and memory usage
229
+ * Outperforms traditional approaches for large out-of-core matrices
230
  """)
231
 
232
+ with gr.Tabs() as tabs:
233
+ # Standard mode tab
234
+ with gr.TabItem("Standard Mode"):
235
+ with gr.Row():
236
+ with gr.Column():
237
+ gr.Markdown("### Matrix Parameters")
238
+ standard_size = gr.Slider(
239
+ label="Matrix Size (N×N)",
240
+ minimum=1000,
241
+ maximum=200000,
242
+ value=10000,
243
+ step=1000,
244
+ info="Square matrix dimension (N×N)"
245
+ )
246
+ standard_density = gr.Slider(
247
+ label="Matrix Density",
248
+ minimum=0.00001,
249
+ maximum=0.01,
250
+ value=0.0001,
251
+ step=0.00001,
252
+ info="Fraction of non-zero elements (0.0001 = 0.01%)"
253
+ )
254
+ standard_chunks = gr.Slider(
255
+ label="Number of Chunks",
256
+ minimum=1,
257
+ maximum=32,
258
+ value=4,
259
+ step=1,
260
+ info="More chunks = less memory but more overhead"
261
+ )
262
+ standard_challenging = gr.Checkbox(
263
+ label="Use Challenging Matrix",
264
+ info="Includes extreme values and special patterns"
265
+ )
266
+ standard_flexonly = gr.Checkbox(
267
+ label="FlexChunk Only",
268
+ info="Skip SciPy comparison for better performance"
269
+ )
270
+ standard_button = gr.Button("Run Benchmark", variant="primary")
271
 
272
+ standard_output = gr.Markdown()
273
+
274
+ # Advanced mode tab
275
+ with gr.TabItem("Advanced Mode"):
276
+ with gr.Row():
277
+ with gr.Column():
278
+ gr.Markdown("### Large Matrix Parameters")
279
+ gr.Markdown("""
280
+ ⚠️ **Warning**: Processing time varies with matrix size:
281
+ - 1M×1M matrices: ~1 second
282
+ - 10M×10M matrices: ~10 seconds
283
+ - 100M×100M matrices: ~1 minute 47 seconds
284
+
285
+ For large matrices, FlexChunk-only mode is automatically enabled.
286
+ """)
287
+
288
+ advanced_size = gr.Slider(
289
+ label="Matrix Size (N×N)",
290
+ minimum=50000,
291
+ maximum=300000000,
292
+ value=100000,
293
+ step=50000,
294
+ info="Square matrix dimension - up to 300M×300M (extremely large values will take significant time)"
295
+ )
296
+ advanced_density = gr.Slider(
297
+ label="Matrix Density",
298
+ minimum=0.0000001,
299
+ maximum=0.001,
300
+ value=0.000001,
301
+ step=0.0000001,
302
+ info="Use lower density for very large matrices"
303
+ )
304
+ advanced_chunks = gr.Slider(
305
+ label="Number of Chunks",
306
+ minimum=4,
307
+ maximum=100,
308
+ value=10,
309
+ step=1,
310
+ info="More chunks recommended for larger matrices"
311
+ )
312
+ advanced_challenging = gr.Checkbox(
313
+ label="Use Challenging Matrix",
314
+ info="Includes extreme values and special patterns"
315
+ )
316
+
317
+ # Force FlexChunk only for advanced mode
318
+ gr.Markdown("*SciPy comparison disabled for large matrices*")
319
+ advanced_button = gr.Button("Run Advanced Benchmark", variant="primary")
320
+
321
+ advanced_output = gr.Markdown()
322
+
323
+ # Event handlers
324
+ standard_button.click(
325
+ fn=run_benchmark,
326
+ inputs=[standard_size, standard_density, standard_chunks, standard_challenging, standard_flexonly],
327
+ outputs=standard_output
328
  )
329
 
330
+ advanced_button.click(
331
+ fn=lambda size, density, chunks, challenging: run_benchmark(size, density, chunks, challenging, True),
332
+ inputs=[advanced_size, advanced_density, advanced_chunks, advanced_challenging],
333
+ outputs=advanced_output
334
+ )
335
+
336
+ gr.Markdown("""
337
+ ---
338
+ ### About FlexChunk
339
+
340
+ FlexChunk enables processing matrices that would normally exceed RAM capacity by dividing them into manageable chunks.
341
+
342
+ **Links:**
343
+ - Read more in the [original article](https://www.lesswrong.com/posts/zpRhsdDkWygTDScxb/flexchunk-enabling-100m-100m-out-of-core-spmv-1-8-min-1-7-gb)
344
+ - View source code on [GitHub](https://github.com/DanielSwift1992/FlexChunk)
345
+
346
+ ---
347
+ ### Benchmark Results
348
+
349
+ Actual performance measurements from our tests:
350
+
351
+ | Matrix Size | Non-zero Elements | Total Time | Peak RAM Usage |
352
+ |-----------------|-------------------|---------------|----------------|
353
+ | 1.0M × 1.0M | 1.2M | 1.07 s | 17.00 MB |
354
+ | 10.0M × 10.0M | 12.0M | 10.21 s | 170.00 MB |
355
+ | 50.0M × 50.0M | 62.5M | 55.27 s | 850.00 MB |
356
+ | 100.0M × 100.0M | 120.0M | 1 min 47.1 s | 1.70 GB |
357
+
358
+ The algorithm scales nearly linearly and can theoretically handle even larger matrices (up to 300M×300M), with proportionally increased processing time and memory usage.
359
+ """)
360
 
361
  # Launch the app
362
  if __name__ == "__main__":