File size: 7,187 Bytes
ee1c253
 
f2f8639
 
ee1c253
386550d
f2f8639
 
d116e72
386550d
 
f2f8639
386550d
f2f8639
 
 
 
 
 
 
d116e72
f2f8639
386550d
f2f8639
 
 
 
d116e72
f2f8639
386550d
f2f8639
 
386550d
 
 
 
 
f2f8639
d116e72
f2f8639
386550d
f2f8639
386550d
 
 
 
f2f8639
d116e72
f2f8639
386550d
f2f8639
27ff6a9
f2f8639
386550d
 
 
f2f8639
 
0278910
386550d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c5a78cf
386550d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f2f8639
386550d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d116e72
386550d
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
import gradio as gr
from train import TrainingLoop
from scipy.special import softmax
import numpy as np

# Global variables for training and data storage
train = None
frames, attributions = None, None

# Lunar Lander environment state feature mapping
LUNAR_LANDER_FEATURES = {
    0: "X-coordinate",
    1: "Y-coordinate", 
    2: "Linear velocity in the X-axis",
    3: "Linear velocity in the Y-axis",
    4: "Angle",
    5: "Angular velocity",
    6: "Left leg touched the floor",
    7: "Right leg touched the floor"
}

def create_training_loop(env_spec):
    """Initialize the training loop with the specified environment."""
    global train
    train = TrainingLoop(env_spec=env_spec)
    train.create_agent()
    return train.env.spec

def display_softmax(inputs):
    """Convert raw attribution values to softmax probabilities for visualization."""
    inputs = np.array(inputs)
    probabilities = softmax(inputs)
    
    softmax_dict = {
        name: float(prob) 
        for name, prob in zip(LUNAR_LANDER_FEATURES.values(), probabilities)
    }
    return softmax_dict

def generate_output(num_iterations, option):
    """Generate attribution explanations for the trained agent."""
    global frames, attributions
    frames, attributions = train.explain_trained(
        num_iterations=num_iterations, 
        option=option
    )
    slider.maximum = len(frames)

def get_frame_and_attribution(slider_value):
    """Get frame and attribution data for the selected timestep."""
    global frames, attributions
    slider_value = min(slider_value, len(frames) - 1)
    frame = frames[slider_value]
    
    print(f"Frame shape: {frame.shape}")
    
    attribution = display_softmax(attributions[slider_value])
    return frame, attribution
    
with gr.Blocks(
    title="Deep RL Explainability",
    theme=gr.themes.Soft(),
    css="""
    .gradio-container {
        max-width: 1200px !important;
    }
    .tab-nav {
        background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
    }
    """
) as demo:
    
    # Header section
    gr.Markdown("""
    # ๐Ÿš€ Deep Reinforcement Learning Explainability
    
    **Exploring AI decision-making through Integrated Gradients in RL environments**
    
    ---
    """)
    
    # Introduction section
    gr.Markdown("""
    ## ๐Ÿ“– How This Works
    
    This application demonstrates the application of **[Integrated Gradients](https://captum.ai/docs/extension/integrated_gradients)** 
    to Deep Reinforcement Learning scenarios. We use PyTorch's Captum library for interpretability 
    and Gymnasium for the continuous Lunar Lander environment.
    
    ### ๐Ÿง  Training Algorithm: [DDPG](https://arxiv.org/abs/1509.02971)
    
    The agent is trained using **Deep Deterministic Policy Gradients** and achieves an average reward 
    of **260.8** per episode (successful landings).
    
    ### ๐ŸŽฏ How to Use This Space
    
    1. **Select Environment**: Choose the Lunar Lander environment
    2. **Choose Baseline**: Select between zero tensor or running average baseline
    3. **Generate Attributions**: Click "ATTRIBUTE" and wait ~20-25 seconds
    4. **Explore Results**: Use the slider to examine attributions at different timesteps
    
    The attributions are normalized using Softmax to provide interpretable probability distributions.
    """)
    
    # Main interface tab
    with gr.Tab("๐Ÿ” Attribution Analysis", elem_id="attribution-tab"):
        
        # Environment setup
        gr.Markdown("### ๐ŸŒ™ Environment Setup")
        env_spec = gr.Dropdown(
            choices=["LunarLander-v2"],
            type="value",
            multiselect=False,
            label="Environment Specification",
            value="LunarLander-v2",
            info="Select the RL environment to analyze"
        )
        
        env_interface = gr.Interface(
            title="Initialize Environment",
            allow_flagging="never",
            inputs=env_spec,
            fn=create_training_loop,
            outputs=gr.JSON(label="Environment Spec"),
            description="Click to initialize the training environment"
        )
        
        # Attribution controls
        gr.Markdown("### โš™๏ธ Attribution Configuration")
        
        with gr.Row():
            with gr.Column(scale=1):
                option = gr.Dropdown(
                    choices=["Torch Tensor of 0's", "Running Average"],
                    type="index",
                    label="Baseline Method",
                    info="Choose the baseline for Integrated Gradients"
                )
            
            with gr.Column(scale=1):
                baselines = gr.Slider(
                    label="Number of Baseline Iterations",
                    interactive=True,
                    minimum=0,
                    maximum=100,
                    value=10,
                    step=5,
                    info="Number of baseline inputs to collect for averaging"
                )
        
        # Generate button
        generate_btn = gr.Button(
            "๐Ÿš€ GENERATE ATTRIBUTIONS",
            variant="primary",
            size="lg"
        )
        generate_btn.click(
            fn=generate_output,
            inputs=[baselines, option],
            outputs=[]
        )
        
        # Results section
        gr.Markdown("### ๐Ÿ“Š Results Visualization")
        
        slider = gr.Slider(
            label="๐ŸŽฌ Key Frame Selector",
            minimum=0,
            maximum=1000,
            step=1,
            value=0,
            info="Navigate through different timesteps to see attributions"
        )
        
        results_interface = gr.Interface(
            fn=get_frame_and_attribution,
            inputs=slider,
            live=True,
            outputs=[
                gr.Image(label="๐ŸŽฎ Environment State", type="numpy"),
                gr.Label(label="๐Ÿ“ˆ Feature Attributions", num_top_classes=8)
            ],
            title="Real-time Attribution Analysis"
        )
    
    gr.Markdown("""
    ---
    
    ## ๐Ÿ› ๏ธ Local Usage & Installation
    
    ### Required Packages
    ```bash
    pip install torch gymnasium 'gymnasium[box2d]'
    ```
    
    ### Box2D Installation (macOS)
    ```bash
    brew install swig
    pip install box2d
    ```
    
    ## ๐ŸŽฏ Lunar Lander Environment Details
    
    ### Reward Structure
    - **Position**: Increased/decreased based on distance to landing pad
    - **Velocity**: Increased/decreased based on speed (slower is better)
    - **Angle**: Decreased when lander is tilted (horizontal is ideal)
    - **Landing**: +10 points for each leg touching ground
    - **Fuel**: -0.03 points per frame for side engine, -0.3 for main engine
    - **Episode End**: -100 for crash, +100 for safe landing
    
    **Success Threshold**: 200+ points per episode
    
    ### Training Functions
    - `load_trained()`: Loads pre-trained model (1000 episodes)
    - `train()`: Trains from scratch
    - Set `render_mode=False` for faster training
    
    ---
    
    *Built with โค๏ธ using Gradio, PyTorch, and Captum*
    """)

if __name__ == "__main__":
    demo.launch()