Spaces:

gabehubner
/

deep-rl-explainability

Sleeping

File size: 7,187 Bytes

import gradio as gr
from train import TrainingLoop
from scipy.special import softmax
import numpy as np

# Global variables for training and data storage
train = None
frames, attributions = None, None

# Lunar Lander environment state feature mapping
LUNAR_LANDER_FEATURES = {
    0: "X-coordinate",
    1: "Y-coordinate", 
    2: "Linear velocity in the X-axis",
    3: "Linear velocity in the Y-axis",
    4: "Angle",
    5: "Angular velocity",
    6: "Left leg touched the floor",
    7: "Right leg touched the floor"
}

def create_training_loop(env_spec):
    """Initialize the training loop with the specified environment."""
    global train
    train = TrainingLoop(env_spec=env_spec)
    train.create_agent()
    return train.env.spec

def display_softmax(inputs):
    """Convert raw attribution values to softmax probabilities for visualization."""
    inputs = np.array(inputs)
    probabilities = softmax(inputs)
    
    softmax_dict = {
        name: float(prob) 
        for name, prob in zip(LUNAR_LANDER_FEATURES.values(), probabilities)
    }
    return softmax_dict

def generate_output(num_iterations, option):
    """Generate attribution explanations for the trained agent."""
    global frames, attributions
    frames, attributions = train.explain_trained(
        num_iterations=num_iterations, 
        option=option
    )
    slider.maximum = len(frames)

def get_frame_and_attribution(slider_value):
    """Get frame and attribution data for the selected timestep."""
    global frames, attributions
    slider_value = min(slider_value, len(frames) - 1)
    frame = frames[slider_value]
    
    print(f"Frame shape: {frame.shape}")
    
    attribution = display_softmax(attributions[slider_value])
    return frame, attribution
    
with gr.Blocks(
    title="Deep RL Explainability",
    theme=gr.themes.Soft(),
    css="""
    .gradio-container {
        max-width: 1200px !important;
    }
    .tab-nav {
        background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
    }
    """
) as demo:
    
    # Header section
    gr.Markdown("""
    # 🚀 Deep Reinforcement Learning Explainability
    
    **Exploring AI decision-making through Integrated Gradients in RL environments**
    
    ---
    """)
    
    # Introduction section
    gr.Markdown("""
    ## 📖 How This Works
    
    This application demonstrates the application of **[Integrated Gradients](https://captum.ai/docs/extension/integrated_gradients)** 
    to Deep Reinforcement Learning scenarios. We use PyTorch's Captum library for interpretability 
    and Gymnasium for the continuous Lunar Lander environment.
    
    ### 🧠 Training Algorithm: [DDPG](https://arxiv.org/abs/1509.02971)
    
    The agent is trained using **Deep Deterministic Policy Gradients** and achieves an average reward 
    of **260.8** per episode (successful landings).
    
    ### 🎯 How to Use This Space
    
    1. **Select Environment**: Choose the Lunar Lander environment
    2. **Choose Baseline**: Select between zero tensor or running average baseline
    3. **Generate Attributions**: Click "ATTRIBUTE" and wait ~20-25 seconds
    4. **Explore Results**: Use the slider to examine attributions at different timesteps
    
    The attributions are normalized using Softmax to provide interpretable probability distributions.
    """)
    
    # Main interface tab
    with gr.Tab("🔍 Attribution Analysis", elem_id="attribution-tab"):
        
        # Environment setup
        gr.Markdown("### 🌙 Environment Setup")
        env_spec = gr.Dropdown(
            choices=["LunarLander-v2"],
            type="value",
            multiselect=False,
            label="Environment Specification",
            value="LunarLander-v2",
            info="Select the RL environment to analyze"
        )
        
        env_interface = gr.Interface(
            title="Initialize Environment",
            allow_flagging="never",
            inputs=env_spec,
            fn=create_training_loop,
            outputs=gr.JSON(label="Environment Spec"),
            description="Click to initialize the training environment"
        )
        
        # Attribution controls
        gr.Markdown("### ⚙️ Attribution Configuration")
        
        with gr.Row():
            with gr.Column(scale=1):
                option = gr.Dropdown(
                    choices=["Torch Tensor of 0's", "Running Average"],
                    type="index",
                    label="Baseline Method",
                    info="Choose the baseline for Integrated Gradients"
                )
            
            with gr.Column(scale=1):
                baselines = gr.Slider(
                    label="Number of Baseline Iterations",
                    interactive=True,
                    minimum=0,
                    maximum=100,
                    value=10,
                    step=5,
                    info="Number of baseline inputs to collect for averaging"
                )
        
        # Generate button
        generate_btn = gr.Button(
            "🚀 GENERATE ATTRIBUTIONS",
            variant="primary",
            size="lg"
        )
        generate_btn.click(
            fn=generate_output,
            inputs=[baselines, option],
            outputs=[]
        )
        
        # Results section
        gr.Markdown("### 📊 Results Visualization")
        
        slider = gr.Slider(
            label="🎬 Key Frame Selector",
            minimum=0,
            maximum=1000,
            step=1,
            value=0,
            info="Navigate through different timesteps to see attributions"
        )
        
        results_interface = gr.Interface(
            fn=get_frame_and_attribution,
            inputs=slider,
            live=True,
            outputs=[
                gr.Image(label="🎮 Environment State", type="numpy"),
                gr.Label(label="📈 Feature Attributions", num_top_classes=8)
            ],
            title="Real-time Attribution Analysis"
        )
    
    gr.Markdown("""
    ---
    
    ## 🛠️ Local Usage & Installation
    
    ### Required Packages
    ```bash
    pip install torch gymnasium 'gymnasium[box2d]'
    ```
    
    ### Box2D Installation (macOS)
    ```bash
    brew install swig
    pip install box2d
    ```
    
    ## 🎯 Lunar Lander Environment Details
    
    ### Reward Structure
    - **Position**: Increased/decreased based on distance to landing pad
    - **Velocity**: Increased/decreased based on speed (slower is better)
    - **Angle**: Decreased when lander is tilted (horizontal is ideal)
    - **Landing**: +10 points for each leg touching ground
    - **Fuel**: -0.03 points per frame for side engine, -0.3 for main engine
    - **Episode End**: -100 for crash, +100 for safe landing
    
    **Success Threshold**: 200+ points per episode
    
    ### Training Functions
    - `load_trained()`: Loads pre-trained model (1000 episodes)
    - `train()`: Trains from scratch
    - Set `render_mode=False` for faster training
    
    ---
    
    *Built with ❤️ using Gradio, PyTorch, and Captum*
    """)

if __name__ == "__main__":
    demo.launch()