Spaces:

DanielSwift
/

FlexChunk

Sleeping

File size: 14,714 Bytes

import gradio as gr
import numpy as np
import scipy.sparse as sparse
import time
import os
import shutil
import math
import sys
from pathlib import Path

# Assuming flex_chunk.py and matrix_multiply.py are in the same directory
from flex_chunk import FlexChunk, save_chunk, load_chunk
from matrix_multiply import prepare_chunks, load_chunks, matrix_vector_multiply

# --- Matrix Generation (copied from test_vs_scipy.py) ---

def generate_sparse_matrix(size, density, challenging=False):
    """
    Generate a sparse test matrix with optional challenging patterns.
    
    Args:
        size: Matrix size (n x n)
        density: Target density
        challenging: Whether to include challenging patterns and extreme values
        
    Returns:
        A scipy.sparse.csr_matrix
    """
    # Calculate number of non-zeros
    nnz = int(size * size * density)
    if nnz == 0: # Ensure at least one non-zero element if density is very low
        nnz = 1 
        
    if not challenging:
        # Simple random matrix
        rows = np.random.randint(0, size, nnz)
        cols = np.random.randint(0, size, nnz)
        data = np.random.rand(nnz)
        # Ensure the matrix actually has the specified size if nnz is small
        if nnz < size:
             # Add diagonal elements to ensure size
             diag_indices = np.arange(min(nnz, size))
             rows = np.concatenate([rows, diag_indices])
             cols = np.concatenate([cols, diag_indices])
             data = np.concatenate([data, np.ones(len(diag_indices))]) # Use 1 for diagonal
        
        matrix = sparse.csr_matrix((data, (rows, cols)), shape=(size, size))
        matrix.sum_duplicates() # Consolidate duplicate entries
        return matrix
    
    # --- Challenging matrix with specific patterns ---
    # Base random matrix (80% of non-zeros)
    base_nnz = int(nnz * 0.8)
    rows = np.random.randint(0, size, base_nnz)
    cols = np.random.randint(0, size, base_nnz)
    data = np.random.rand(base_nnz)
    
    # Add diagonal elements (10% of non-zeros)
    diag_nnz = int(nnz * 0.1)
    diag_indices = np.random.choice(size, diag_nnz, replace=False)
    
    # Add extreme values (10% of non-zeros)
    extreme_nnz = max(0, nnz - base_nnz - diag_nnz) # Ensure non-negative
    extreme_rows = np.random.randint(0, size, extreme_nnz)
    extreme_cols = np.random.randint(0, size, extreme_nnz)
    
    # Mix of very large and very small values
    extreme_data = np.concatenate([
        np.random.uniform(1e6, 1e9, extreme_nnz // 2),
        np.random.uniform(1e-9, 1e-6, extreme_nnz - extreme_nnz // 2)
    ]) if extreme_nnz > 0 else np.array([])
    if extreme_nnz > 0:
        np.random.shuffle(extreme_data)
    
    # Combine all components
    all_rows = np.concatenate([rows, diag_indices, extreme_rows])
    all_cols = np.concatenate([cols, diag_indices, extreme_cols])
    all_data = np.concatenate([data, np.random.rand(diag_nnz), extreme_data])
    
    matrix = sparse.csr_matrix((all_data, (all_rows, all_cols)), shape=(size, size))
    matrix.sum_duplicates() # Consolidate duplicate entries
    return matrix

# --- Benchmark Function (Placeholder) ---

def run_benchmark(size, density, num_chunks, challenging, flex_only=False, progress=gr.Progress()):
    # This function will contain the main logic from test_vs_scipy.py
    # Adapted for Gradio inputs and outputs
    progress(0, desc="Starting Benchmark...")
    time.sleep(1) # Placeholder
    
    # 1. Setup storage
    storage_dir = Path("./flex_chunk_temp_space")
    if storage_dir.exists():
        shutil.rmtree(storage_dir)
    storage_dir.mkdir(exist_ok=True)
    
    progress(0.1, desc="Generating Matrix...")
    # 2. Generate matrix and vector
    matrix = generate_sparse_matrix(size, density, challenging)
    vector = np.random.rand(size)
    actual_nnz = matrix.nnz
    actual_density = actual_nnz / (size * size) if size > 0 else 0
    
    matrix_info = f"Matrix: {size}x{size}, Target Density: {density:.6f}, Actual Density: {actual_density:.6f}, NNZ: {actual_nnz}"
    print(matrix_info) # For debugging in Hugging Face console
    
    # --- FlexChunk Run ---
    progress(0.2, desc="Preparing FlexChunks...")
    prepare_start = time.time()
    prepare_chunks(matrix, num_chunks, str(storage_dir), verbose=False)
    prepare_time = time.time() - prepare_start

    progress(0.4, desc="Loading FlexChunks...")
    load_start = time.time()
    chunks = load_chunks(str(storage_dir), verbose=False)
    load_time = time.time() - load_start
    
    progress(0.6, desc="Running FlexChunk SpMV...")
    flex_compute_start = time.time()
    flex_result = matrix_vector_multiply(chunks, vector, verbose=False)
    flex_compute_time = time.time() - flex_compute_start
    flex_total_time = load_time + flex_compute_time
    
    # Estimate FlexChunk memory usage
    max_chunk_size = max(chunk.data.nbytes + chunk.col_indices.nbytes + chunk.row_offsets.nbytes for chunk in chunks)
    flex_operational_memory = max_chunk_size + vector.nbytes + (size * 8)  # Chunk + vector + result vector
    flex_memory_mb = flex_operational_memory / (1024*1024)
    
    # --- SciPy Run (Optional) ---
    if not flex_only:
        progress(0.7, desc="Saving SciPy data...")
        scipy_temp_dir = storage_dir / "scipy_temp"
        scipy_temp_dir.mkdir(exist_ok=True)
        matrix_file = scipy_temp_dir / "matrix.npz"
        vector_file = scipy_temp_dir / "vector.npy"
        
        scipy_save_start = time.time()
        sparse.save_npz(matrix_file, matrix)
        np.save(vector_file, vector)
        scipy_save_time = time.time() - scipy_save_start
        
        progress(0.8, desc="Loading SciPy data...")
        scipy_load_start = time.time()
        loaded_matrix = sparse.load_npz(matrix_file)
        loaded_vector = np.load(vector_file)
        scipy_load_time = time.time() - scipy_load_start
        
        progress(0.9, desc="Running SciPy SpMV...")
        scipy_compute_start = time.time()
        scipy_result = loaded_matrix @ loaded_vector
        scipy_compute_time = time.time() - scipy_compute_start
        scipy_total_time = scipy_load_time + scipy_compute_time
        
        # Estimate SciPy memory usage
        scipy_memory = loaded_matrix.data.nbytes + loaded_matrix.indices.nbytes + loaded_matrix.indptr.nbytes + loaded_vector.nbytes
        scipy_memory_mb = scipy_memory / (1024*1024)
        
        # --- Comparison ---
        progress(0.95, desc="Comparing results...")
        diff = np.abs(scipy_result - flex_result)
        max_diff = np.max(diff) if len(diff) > 0 else 0
        mean_diff = np.mean(diff) if len(diff) > 0 else 0
        is_close = np.allclose(scipy_result, flex_result, atol=1e-9) # Increased tolerance slightly
        comparison_result = f"✅ Results Match! (Max Diff: {max_diff:.2e}, Mean Diff: {mean_diff:.2e})" if is_close else f"❌ Results Differ! (Max Diff: {max_diff:.2e}, Mean Diff: {mean_diff:.2e})"
    
    # --- Cleanup ---
    shutil.rmtree(storage_dir)
    
    progress(1.0, desc="Benchmark Complete")
    
    # --- Format Output ---
    if flex_only:
        results_summary = f"""
## Matrix Information
{matrix_info}

## FlexChunk Performance
| Stage | Time |
|-------|------|
| Prepare Chunks | {prepare_time:.4f}s |
| Load Chunks | {load_time:.4f}s |
| Compute | {flex_compute_time:.4f}s |
| **Total (Load+Compute)** | **{flex_total_time:.4f}s** |

## Memory Usage
| Metric | Value |
|--------|-------|
| Peak RAM Usage | {flex_memory_mb:.2f} MB |
| Chunks | {num_chunks} |
"""
    else:
        results_summary = f"""
## Matrix Information
{matrix_info}

## Performance Comparison

| Stage | FlexChunk | SciPy (Out-of-Core) |
|-------|-----------|---------------------|
| Data Preparation | {prepare_time:.4f}s | {scipy_save_time:.4f}s |  
| Load Time | {load_time:.4f}s | {scipy_load_time:.4f}s |
| Compute Time | {flex_compute_time:.4f}s | {scipy_compute_time:.4f}s |
| **Total (Load+Compute)** | **{flex_total_time:.4f}s** | **{scipy_total_time:.4f}s** |

## Memory Usage
| Metric | FlexChunk | SciPy |
|--------|-----------|-------|
| Peak RAM Usage | {flex_memory_mb:.2f} MB | {scipy_memory_mb:.2f} MB |
| Memory Ratio | 1.0x | {scipy_memory_mb/flex_memory_mb:.2f}x |

## Comparison
{comparison_result}
"""
    
    return results_summary

# --- Gradio Interface ---

with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("""
    # FlexChunk: Out-of-Core Sparse Matrix-Vector Multiplication
    
    This interactive demo showcases **FlexChunk**, an algorithm for performing Sparse Matrix-Vector Multiplication (SpMV) on matrices that may be too large to fit entirely in memory.
    
    **Key Benefits:**
    * Process matrices up to 100M×100M using only ~1.7GB RAM
    * Near-linear scaling in both time and memory usage
    * Outperforms traditional approaches for large out-of-core matrices
    """)
    
    with gr.Tabs() as tabs:
        # Standard mode tab
        with gr.TabItem("Standard Mode"):
            with gr.Row():
                with gr.Column():
                    gr.Markdown("### Matrix Parameters")
                    standard_size = gr.Slider(
                        label="Matrix Size (N×N)", 
                        minimum=1000, 
                        maximum=200000, 
                        value=10000, 
                        step=1000,
                        info="Square matrix dimension (N×N)"
                    )
                    standard_density = gr.Slider(
                        label="Matrix Density", 
                        minimum=0.00001, 
                        maximum=0.01, 
                        value=0.0001, 
                        step=0.00001,
                        info="Fraction of non-zero elements (0.0001 = 0.01%)"
                    )
                    standard_chunks = gr.Slider(
                        label="Number of Chunks", 
                        minimum=1, 
                        maximum=32, 
                        value=4, 
                        step=1,
                        info="More chunks = less memory but more overhead"
                    )
                    standard_challenging = gr.Checkbox(
                        label="Use Challenging Matrix",
                        info="Includes extreme values and special patterns"
                    )
                    standard_flexonly = gr.Checkbox(
                        label="FlexChunk Only",
                        info="Skip SciPy comparison for better performance"
                    )
                    standard_button = gr.Button("Run Benchmark", variant="primary")
            
            standard_output = gr.Markdown()
        
        # Advanced mode tab
        with gr.TabItem("Advanced Mode"):
            with gr.Row():
                with gr.Column():
                    gr.Markdown("### Large Matrix Parameters")
                    gr.Markdown("""
                    ⚠️ **Warning**: Processing time varies with matrix size:
                    - 1M×1M matrices: ~1 second
                    - 10M×10M matrices: ~10 seconds
                    - 100M×100M matrices: ~1 minute 47 seconds
                    
                    For large matrices, FlexChunk-only mode is automatically enabled.
                    """)
                    
                    advanced_size = gr.Slider(
                        label="Matrix Size (N×N)", 
                        minimum=50000, 
                        maximum=300000000, 
                        value=100000, 
                        step=50000,
                        info="Square matrix dimension - up to 300M×300M (extremely large values will take significant time)"
                    )
                    advanced_density = gr.Slider(
                        label="Matrix Density", 
                        minimum=0.0000001, 
                        maximum=0.001, 
                        value=0.000001, 
                        step=0.0000001,
                        info="Use lower density for very large matrices"
                    )
                    advanced_chunks = gr.Slider(
                        label="Number of Chunks", 
                        minimum=4, 
                        maximum=100, 
                        value=10, 
                        step=1,
                        info="More chunks recommended for larger matrices"
                    )
                    advanced_challenging = gr.Checkbox(
                        label="Use Challenging Matrix",
                        info="Includes extreme values and special patterns"
                    )
                    
                    # Force FlexChunk only for advanced mode
                    gr.Markdown("*SciPy comparison disabled for large matrices*")
                    advanced_button = gr.Button("Run Advanced Benchmark", variant="primary")
            
            advanced_output = gr.Markdown()
    
    # Event handlers
    standard_button.click(
        fn=run_benchmark,
        inputs=[standard_size, standard_density, standard_chunks, standard_challenging, standard_flexonly],
        outputs=standard_output
    )
    
    advanced_button.click(
        fn=lambda size, density, chunks, challenging: run_benchmark(size, density, chunks, challenging, True),
        inputs=[advanced_size, advanced_density, advanced_chunks, advanced_challenging],
        outputs=advanced_output
    )
    
    gr.Markdown("""
    ---
    ### About FlexChunk
    
    FlexChunk enables processing matrices that would normally exceed RAM capacity by dividing them into manageable chunks.
    
    **Links:**
    - Read more in the [original article](https://www.lesswrong.com/posts/zpRhsdDkWygTDScxb/flexchunk-enabling-100m-100m-out-of-core-spmv-1-8-min-1-7-gb)
    - View source code on [GitHub](https://github.com/DanielSwift1992/FlexChunk)
    
    ---
    ### Benchmark Results
    
    Actual performance measurements from our tests:
    
    | Matrix Size     | Non-zero Elements | Total Time    | Peak RAM Usage |
    |-----------------|-------------------|---------------|----------------|
    | 1.0M × 1.0M     | 1.2M              | 1.07 s        | 17.00 MB       |
    | 10.0M × 10.0M   | 12.0M             | 10.21 s       | 170.00 MB      |
    | 50.0M × 50.0M   | 62.5M             | 55.27 s       | 850.00 MB      |
    | 100.0M × 100.0M | 120.0M            | 1 min 47.1 s  | 1.70 GB        |
    
    The algorithm scales nearly linearly and can theoretically handle even larger matrices (up to 300M×300M), with proportionally increased processing time and memory usage.
    """)

# Launch the app
if __name__ == "__main__":
    demo.launch()