Spaces:
Sleeping
Sleeping
import gradio as gr | |
from train import TrainingLoop | |
from scipy.special import softmax | |
import numpy as np | |
# Global variables for training and data storage | |
train = None | |
frames, attributions = None, None | |
# Lunar Lander environment state feature mapping | |
LUNAR_LANDER_FEATURES = { | |
0: "X-coordinate", | |
1: "Y-coordinate", | |
2: "Linear velocity in the X-axis", | |
3: "Linear velocity in the Y-axis", | |
4: "Angle", | |
5: "Angular velocity", | |
6: "Left leg touched the floor", | |
7: "Right leg touched the floor" | |
} | |
def create_training_loop(env_spec): | |
"""Initialize the training loop with the specified environment.""" | |
global train | |
train = TrainingLoop(env_spec=env_spec) | |
train.create_agent() | |
return train.env.spec | |
def display_softmax(inputs): | |
"""Convert raw attribution values to softmax probabilities for visualization.""" | |
inputs = np.array(inputs) | |
probabilities = softmax(inputs) | |
softmax_dict = { | |
name: float(prob) | |
for name, prob in zip(LUNAR_LANDER_FEATURES.values(), probabilities) | |
} | |
return softmax_dict | |
def generate_output(num_iterations, option): | |
"""Generate attribution explanations for the trained agent.""" | |
global frames, attributions | |
frames, attributions = train.explain_trained( | |
num_iterations=num_iterations, | |
option=option | |
) | |
slider.maximum = len(frames) | |
def get_frame_and_attribution(slider_value): | |
"""Get frame and attribution data for the selected timestep.""" | |
global frames, attributions | |
slider_value = min(slider_value, len(frames) - 1) | |
frame = frames[slider_value] | |
print(f"Frame shape: {frame.shape}") | |
attribution = display_softmax(attributions[slider_value]) | |
return frame, attribution | |
with gr.Blocks( | |
title="Deep RL Explainability", | |
theme=gr.themes.Soft(), | |
css=""" | |
.gradio-container { | |
max-width: 1200px !important; | |
} | |
.tab-nav { | |
background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); | |
} | |
""" | |
) as demo: | |
# Header section | |
gr.Markdown(""" | |
# ๐ Deep Reinforcement Learning Explainability | |
**Exploring AI decision-making through Integrated Gradients in RL environments** | |
--- | |
""") | |
# Introduction section | |
gr.Markdown(""" | |
## ๐ How This Works | |
This application demonstrates the application of **[Integrated Gradients](https://captum.ai/docs/extension/integrated_gradients)** | |
to Deep Reinforcement Learning scenarios. We use PyTorch's Captum library for interpretability | |
and Gymnasium for the continuous Lunar Lander environment. | |
### ๐ง Training Algorithm: [DDPG](https://arxiv.org/abs/1509.02971) | |
The agent is trained using **Deep Deterministic Policy Gradients** and achieves an average reward | |
of **260.8** per episode (successful landings). | |
### ๐ฏ How to Use This Space | |
1. **Select Environment**: Choose the Lunar Lander environment | |
2. **Choose Baseline**: Select between zero tensor or running average baseline | |
3. **Generate Attributions**: Click "ATTRIBUTE" and wait ~20-25 seconds | |
4. **Explore Results**: Use the slider to examine attributions at different timesteps | |
The attributions are normalized using Softmax to provide interpretable probability distributions. | |
""") | |
# Main interface tab | |
with gr.Tab("๐ Attribution Analysis", elem_id="attribution-tab"): | |
# Environment setup | |
gr.Markdown("### ๐ Environment Setup") | |
env_spec = gr.Dropdown( | |
choices=["LunarLander-v2"], | |
type="value", | |
multiselect=False, | |
label="Environment Specification", | |
value="LunarLander-v2", | |
info="Select the RL environment to analyze" | |
) | |
env_interface = gr.Interface( | |
title="Initialize Environment", | |
allow_flagging="never", | |
inputs=env_spec, | |
fn=create_training_loop, | |
outputs=gr.JSON(label="Environment Spec"), | |
description="Click to initialize the training environment" | |
) | |
# Attribution controls | |
gr.Markdown("### โ๏ธ Attribution Configuration") | |
with gr.Row(): | |
with gr.Column(scale=1): | |
option = gr.Dropdown( | |
choices=["Torch Tensor of 0's", "Running Average"], | |
type="index", | |
label="Baseline Method", | |
info="Choose the baseline for Integrated Gradients" | |
) | |
with gr.Column(scale=1): | |
baselines = gr.Slider( | |
label="Number of Baseline Iterations", | |
interactive=True, | |
minimum=0, | |
maximum=100, | |
value=10, | |
step=5, | |
info="Number of baseline inputs to collect for averaging" | |
) | |
# Generate button | |
generate_btn = gr.Button( | |
"๐ GENERATE ATTRIBUTIONS", | |
variant="primary", | |
size="lg" | |
) | |
generate_btn.click( | |
fn=generate_output, | |
inputs=[baselines, option], | |
outputs=[] | |
) | |
# Results section | |
gr.Markdown("### ๐ Results Visualization") | |
slider = gr.Slider( | |
label="๐ฌ Key Frame Selector", | |
minimum=0, | |
maximum=1000, | |
step=1, | |
value=0, | |
info="Navigate through different timesteps to see attributions" | |
) | |
results_interface = gr.Interface( | |
fn=get_frame_and_attribution, | |
inputs=slider, | |
live=True, | |
outputs=[ | |
gr.Image(label="๐ฎ Environment State", type="numpy"), | |
gr.Label(label="๐ Feature Attributions", num_top_classes=8) | |
], | |
title="Real-time Attribution Analysis" | |
) | |
gr.Markdown(""" | |
--- | |
## ๐ ๏ธ Local Usage & Installation | |
### Required Packages | |
```bash | |
pip install torch gymnasium 'gymnasium[box2d]' | |
``` | |
### Box2D Installation (macOS) | |
```bash | |
brew install swig | |
pip install box2d | |
``` | |
## ๐ฏ Lunar Lander Environment Details | |
### Reward Structure | |
- **Position**: Increased/decreased based on distance to landing pad | |
- **Velocity**: Increased/decreased based on speed (slower is better) | |
- **Angle**: Decreased when lander is tilted (horizontal is ideal) | |
- **Landing**: +10 points for each leg touching ground | |
- **Fuel**: -0.03 points per frame for side engine, -0.3 for main engine | |
- **Episode End**: -100 for crash, +100 for safe landing | |
**Success Threshold**: 200+ points per episode | |
### Training Functions | |
- `load_trained()`: Loads pre-trained model (1000 episodes) | |
- `train()`: Trains from scratch | |
- Set `render_mode=False` for faster training | |
--- | |
*Built with โค๏ธ using Gradio, PyTorch, and Captum* | |
""") | |
if __name__ == "__main__": | |
demo.launch() |