Spaces:
Sleeping
Sleeping
FlexChunk Demo: Complete interactive application for sparse matrix-vector multiplication benchmarking
a7585d3
import gradio as gr | |
import numpy as np | |
import scipy.sparse as sparse | |
import time | |
import os | |
import shutil | |
import math | |
import sys | |
from pathlib import Path | |
# Assuming flex_chunk.py and matrix_multiply.py are in the same directory | |
from flex_chunk import FlexChunk, save_chunk, load_chunk | |
from matrix_multiply import prepare_chunks, load_chunks, matrix_vector_multiply | |
# --- Matrix Generation (copied from test_vs_scipy.py) --- | |
def generate_sparse_matrix(size, density, challenging=False): | |
""" | |
Generate a sparse test matrix with optional challenging patterns. | |
Args: | |
size: Matrix size (n x n) | |
density: Target density | |
challenging: Whether to include challenging patterns and extreme values | |
Returns: | |
A scipy.sparse.csr_matrix | |
""" | |
# Calculate number of non-zeros | |
nnz = int(size * size * density) | |
if nnz == 0: # Ensure at least one non-zero element if density is very low | |
nnz = 1 | |
if not challenging: | |
# Simple random matrix | |
rows = np.random.randint(0, size, nnz) | |
cols = np.random.randint(0, size, nnz) | |
data = np.random.rand(nnz) | |
# Ensure the matrix actually has the specified size if nnz is small | |
if nnz < size: | |
# Add diagonal elements to ensure size | |
diag_indices = np.arange(min(nnz, size)) | |
rows = np.concatenate([rows, diag_indices]) | |
cols = np.concatenate([cols, diag_indices]) | |
data = np.concatenate([data, np.ones(len(diag_indices))]) # Use 1 for diagonal | |
matrix = sparse.csr_matrix((data, (rows, cols)), shape=(size, size)) | |
matrix.sum_duplicates() # Consolidate duplicate entries | |
return matrix | |
# --- Challenging matrix with specific patterns --- | |
# Base random matrix (80% of non-zeros) | |
base_nnz = int(nnz * 0.8) | |
rows = np.random.randint(0, size, base_nnz) | |
cols = np.random.randint(0, size, base_nnz) | |
data = np.random.rand(base_nnz) | |
# Add diagonal elements (10% of non-zeros) | |
diag_nnz = int(nnz * 0.1) | |
diag_indices = np.random.choice(size, diag_nnz, replace=False) | |
# Add extreme values (10% of non-zeros) | |
extreme_nnz = max(0, nnz - base_nnz - diag_nnz) # Ensure non-negative | |
extreme_rows = np.random.randint(0, size, extreme_nnz) | |
extreme_cols = np.random.randint(0, size, extreme_nnz) | |
# Mix of very large and very small values | |
extreme_data = np.concatenate([ | |
np.random.uniform(1e6, 1e9, extreme_nnz // 2), | |
np.random.uniform(1e-9, 1e-6, extreme_nnz - extreme_nnz // 2) | |
]) if extreme_nnz > 0 else np.array([]) | |
if extreme_nnz > 0: | |
np.random.shuffle(extreme_data) | |
# Combine all components | |
all_rows = np.concatenate([rows, diag_indices, extreme_rows]) | |
all_cols = np.concatenate([cols, diag_indices, extreme_cols]) | |
all_data = np.concatenate([data, np.random.rand(diag_nnz), extreme_data]) | |
matrix = sparse.csr_matrix((all_data, (all_rows, all_cols)), shape=(size, size)) | |
matrix.sum_duplicates() # Consolidate duplicate entries | |
return matrix | |
# --- Benchmark Function (Placeholder) --- | |
def run_benchmark(size, density, num_chunks, challenging, flex_only=False, progress=gr.Progress()): | |
# This function will contain the main logic from test_vs_scipy.py | |
# Adapted for Gradio inputs and outputs | |
progress(0, desc="Starting Benchmark...") | |
time.sleep(1) # Placeholder | |
# 1. Setup storage | |
storage_dir = Path("./flex_chunk_temp_space") | |
if storage_dir.exists(): | |
shutil.rmtree(storage_dir) | |
storage_dir.mkdir(exist_ok=True) | |
progress(0.1, desc="Generating Matrix...") | |
# 2. Generate matrix and vector | |
matrix = generate_sparse_matrix(size, density, challenging) | |
vector = np.random.rand(size) | |
actual_nnz = matrix.nnz | |
actual_density = actual_nnz / (size * size) if size > 0 else 0 | |
matrix_info = f"Matrix: {size}x{size}, Target Density: {density:.6f}, Actual Density: {actual_density:.6f}, NNZ: {actual_nnz}" | |
print(matrix_info) # For debugging in Hugging Face console | |
# --- FlexChunk Run --- | |
progress(0.2, desc="Preparing FlexChunks...") | |
prepare_start = time.time() | |
prepare_chunks(matrix, num_chunks, str(storage_dir), verbose=False) | |
prepare_time = time.time() - prepare_start | |
progress(0.4, desc="Loading FlexChunks...") | |
load_start = time.time() | |
chunks = load_chunks(str(storage_dir), verbose=False) | |
load_time = time.time() - load_start | |
progress(0.6, desc="Running FlexChunk SpMV...") | |
flex_compute_start = time.time() | |
flex_result = matrix_vector_multiply(chunks, vector, verbose=False) | |
flex_compute_time = time.time() - flex_compute_start | |
flex_total_time = load_time + flex_compute_time | |
# Estimate FlexChunk memory usage | |
max_chunk_size = max(chunk.data.nbytes + chunk.col_indices.nbytes + chunk.row_offsets.nbytes for chunk in chunks) | |
flex_operational_memory = max_chunk_size + vector.nbytes + (size * 8) # Chunk + vector + result vector | |
flex_memory_mb = flex_operational_memory / (1024*1024) | |
# --- SciPy Run (Optional) --- | |
if not flex_only: | |
progress(0.7, desc="Saving SciPy data...") | |
scipy_temp_dir = storage_dir / "scipy_temp" | |
scipy_temp_dir.mkdir(exist_ok=True) | |
matrix_file = scipy_temp_dir / "matrix.npz" | |
vector_file = scipy_temp_dir / "vector.npy" | |
scipy_save_start = time.time() | |
sparse.save_npz(matrix_file, matrix) | |
np.save(vector_file, vector) | |
scipy_save_time = time.time() - scipy_save_start | |
progress(0.8, desc="Loading SciPy data...") | |
scipy_load_start = time.time() | |
loaded_matrix = sparse.load_npz(matrix_file) | |
loaded_vector = np.load(vector_file) | |
scipy_load_time = time.time() - scipy_load_start | |
progress(0.9, desc="Running SciPy SpMV...") | |
scipy_compute_start = time.time() | |
scipy_result = loaded_matrix @ loaded_vector | |
scipy_compute_time = time.time() - scipy_compute_start | |
scipy_total_time = scipy_load_time + scipy_compute_time | |
# Estimate SciPy memory usage | |
scipy_memory = loaded_matrix.data.nbytes + loaded_matrix.indices.nbytes + loaded_matrix.indptr.nbytes + loaded_vector.nbytes | |
scipy_memory_mb = scipy_memory / (1024*1024) | |
# --- Comparison --- | |
progress(0.95, desc="Comparing results...") | |
diff = np.abs(scipy_result - flex_result) | |
max_diff = np.max(diff) if len(diff) > 0 else 0 | |
mean_diff = np.mean(diff) if len(diff) > 0 else 0 | |
is_close = np.allclose(scipy_result, flex_result, atol=1e-9) # Increased tolerance slightly | |
comparison_result = f"✅ Results Match! (Max Diff: {max_diff:.2e}, Mean Diff: {mean_diff:.2e})" if is_close else f"❌ Results Differ! (Max Diff: {max_diff:.2e}, Mean Diff: {mean_diff:.2e})" | |
# --- Cleanup --- | |
shutil.rmtree(storage_dir) | |
progress(1.0, desc="Benchmark Complete") | |
# --- Format Output --- | |
if flex_only: | |
results_summary = f""" | |
## Matrix Information | |
{matrix_info} | |
## FlexChunk Performance | |
| Stage | Time | | |
|-------|------| | |
| Prepare Chunks | {prepare_time:.4f}s | | |
| Load Chunks | {load_time:.4f}s | | |
| Compute | {flex_compute_time:.4f}s | | |
| **Total (Load+Compute)** | **{flex_total_time:.4f}s** | | |
## Memory Usage | |
| Metric | Value | | |
|--------|-------| | |
| Peak RAM Usage | {flex_memory_mb:.2f} MB | | |
| Chunks | {num_chunks} | | |
""" | |
else: | |
results_summary = f""" | |
## Matrix Information | |
{matrix_info} | |
## Performance Comparison | |
| Stage | FlexChunk | SciPy (Out-of-Core) | | |
|-------|-----------|---------------------| | |
| Data Preparation | {prepare_time:.4f}s | {scipy_save_time:.4f}s | | |
| Load Time | {load_time:.4f}s | {scipy_load_time:.4f}s | | |
| Compute Time | {flex_compute_time:.4f}s | {scipy_compute_time:.4f}s | | |
| **Total (Load+Compute)** | **{flex_total_time:.4f}s** | **{scipy_total_time:.4f}s** | | |
## Memory Usage | |
| Metric | FlexChunk | SciPy | | |
|--------|-----------|-------| | |
| Peak RAM Usage | {flex_memory_mb:.2f} MB | {scipy_memory_mb:.2f} MB | | |
| Memory Ratio | 1.0x | {scipy_memory_mb/flex_memory_mb:.2f}x | | |
## Comparison | |
{comparison_result} | |
""" | |
return results_summary | |
# --- Gradio Interface --- | |
with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
gr.Markdown(""" | |
# FlexChunk: Out-of-Core Sparse Matrix-Vector Multiplication | |
This interactive demo showcases **FlexChunk**, an algorithm for performing Sparse Matrix-Vector Multiplication (SpMV) on matrices that may be too large to fit entirely in memory. | |
**Key Benefits:** | |
* Process matrices up to 100M×100M using only ~1.7GB RAM | |
* Near-linear scaling in both time and memory usage | |
* Outperforms traditional approaches for large out-of-core matrices | |
""") | |
with gr.Tabs() as tabs: | |
# Standard mode tab | |
with gr.TabItem("Standard Mode"): | |
with gr.Row(): | |
with gr.Column(): | |
gr.Markdown("### Matrix Parameters") | |
standard_size = gr.Slider( | |
label="Matrix Size (N×N)", | |
minimum=1000, | |
maximum=200000, | |
value=10000, | |
step=1000, | |
info="Square matrix dimension (N×N)" | |
) | |
standard_density = gr.Slider( | |
label="Matrix Density", | |
minimum=0.00001, | |
maximum=0.01, | |
value=0.0001, | |
step=0.00001, | |
info="Fraction of non-zero elements (0.0001 = 0.01%)" | |
) | |
standard_chunks = gr.Slider( | |
label="Number of Chunks", | |
minimum=1, | |
maximum=32, | |
value=4, | |
step=1, | |
info="More chunks = less memory but more overhead" | |
) | |
standard_challenging = gr.Checkbox( | |
label="Use Challenging Matrix", | |
info="Includes extreme values and special patterns" | |
) | |
standard_flexonly = gr.Checkbox( | |
label="FlexChunk Only", | |
info="Skip SciPy comparison for better performance" | |
) | |
standard_button = gr.Button("Run Benchmark", variant="primary") | |
standard_output = gr.Markdown() | |
# Advanced mode tab | |
with gr.TabItem("Advanced Mode"): | |
with gr.Row(): | |
with gr.Column(): | |
gr.Markdown("### Large Matrix Parameters") | |
gr.Markdown(""" | |
⚠️ **Warning**: Processing time varies with matrix size: | |
- 1M×1M matrices: ~1 second | |
- 10M×10M matrices: ~10 seconds | |
- 100M×100M matrices: ~1 minute 47 seconds | |
For large matrices, FlexChunk-only mode is automatically enabled. | |
""") | |
advanced_size = gr.Slider( | |
label="Matrix Size (N×N)", | |
minimum=50000, | |
maximum=300000000, | |
value=100000, | |
step=50000, | |
info="Square matrix dimension - up to 300M×300M (extremely large values will take significant time)" | |
) | |
advanced_density = gr.Slider( | |
label="Matrix Density", | |
minimum=0.0000001, | |
maximum=0.001, | |
value=0.000001, | |
step=0.0000001, | |
info="Use lower density for very large matrices" | |
) | |
advanced_chunks = gr.Slider( | |
label="Number of Chunks", | |
minimum=4, | |
maximum=100, | |
value=10, | |
step=1, | |
info="More chunks recommended for larger matrices" | |
) | |
advanced_challenging = gr.Checkbox( | |
label="Use Challenging Matrix", | |
info="Includes extreme values and special patterns" | |
) | |
# Force FlexChunk only for advanced mode | |
gr.Markdown("*SciPy comparison disabled for large matrices*") | |
advanced_button = gr.Button("Run Advanced Benchmark", variant="primary") | |
advanced_output = gr.Markdown() | |
# Event handlers | |
standard_button.click( | |
fn=run_benchmark, | |
inputs=[standard_size, standard_density, standard_chunks, standard_challenging, standard_flexonly], | |
outputs=standard_output | |
) | |
advanced_button.click( | |
fn=lambda size, density, chunks, challenging: run_benchmark(size, density, chunks, challenging, True), | |
inputs=[advanced_size, advanced_density, advanced_chunks, advanced_challenging], | |
outputs=advanced_output | |
) | |
gr.Markdown(""" | |
--- | |
### About FlexChunk | |
FlexChunk enables processing matrices that would normally exceed RAM capacity by dividing them into manageable chunks. | |
**Links:** | |
- Read more in the [original article](https://www.lesswrong.com/posts/zpRhsdDkWygTDScxb/flexchunk-enabling-100m-100m-out-of-core-spmv-1-8-min-1-7-gb) | |
- View source code on [GitHub](https://github.com/DanielSwift1992/FlexChunk) | |
--- | |
### Benchmark Results | |
Actual performance measurements from our tests: | |
| Matrix Size | Non-zero Elements | Total Time | Peak RAM Usage | | |
|-----------------|-------------------|---------------|----------------| | |
| 1.0M × 1.0M | 1.2M | 1.07 s | 17.00 MB | | |
| 10.0M × 10.0M | 12.0M | 10.21 s | 170.00 MB | | |
| 50.0M × 50.0M | 62.5M | 55.27 s | 850.00 MB | | |
| 100.0M × 100.0M | 120.0M | 1 min 47.1 s | 1.70 GB | | |
The algorithm scales nearly linearly and can theoretically handle even larger matrices (up to 300M×300M), with proportionally increased processing time and memory usage. | |
""") | |
# Launch the app | |
if __name__ == "__main__": | |
demo.launch() |