Spaces:
Sleeping
Sleeping
Creating trainer and tester modules
Browse files- DDQN_FNN.py → DDQN.py +0 -0
- antiJamEnv.py +97 -0
- antiJamming_v1.py +0 -139
- tester.py +167 -0
- trainer.py +167 -0
DDQN_FNN.py → DDQN.py
RENAMED
File without changes
|
antiJamEnv.py
ADDED
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
|
4 |
+
import gym
|
5 |
+
import tensorflow as tf
|
6 |
+
import tf_slim as slim
|
7 |
+
import numpy as np
|
8 |
+
import matplotlib as mpl
|
9 |
+
import matplotlib.pyplot as plt
|
10 |
+
import json
|
11 |
+
from tensorflow import keras
|
12 |
+
from ns3gym import ns3env
|
13 |
+
import gym
|
14 |
+
from gym import spaces
|
15 |
+
import numpy as np
|
16 |
+
|
17 |
+
|
18 |
+
class AntiJamEnv(gym.Env):
|
19 |
+
def __init__(self):
|
20 |
+
super(AntiJamEnv, self).__init__()
|
21 |
+
|
22 |
+
self.num_channels = 8
|
23 |
+
self.channel_bandwidth = 20 # MHz
|
24 |
+
self.frequency_range = [5180, 5320] # MHz
|
25 |
+
|
26 |
+
self.observation_space = spaces.Box(low=-30, high=40, shape=(self.num_channels,), dtype=np.float32)
|
27 |
+
self.action_space = spaces.Discrete(self.num_channels)
|
28 |
+
|
29 |
+
self.current_channel = np.random.randint(self.num_channels)
|
30 |
+
self.jammer_modes = ['constant', 'random', 'sweeping']
|
31 |
+
self.jammer_mode = np.random.choice(self.jammer_modes)
|
32 |
+
self.jammer_frequency = np.random.uniform(self.frequency_range[0], self.frequency_range[1])
|
33 |
+
|
34 |
+
def _get_received_power(self, channel_idx):
|
35 |
+
# Simulate received jamming power using normal distribution
|
36 |
+
jammed_power = np.random.normal(loc=30, scale=5)
|
37 |
+
adjacent_power = np.random.normal(loc=13, scale=3)
|
38 |
+
far_away_power = np.random.normal(loc=-7, scale=1)
|
39 |
+
|
40 |
+
if channel_idx == self.current_channel:
|
41 |
+
return jammed_power
|
42 |
+
elif abs(channel_idx - self.current_channel) == 1:
|
43 |
+
return adjacent_power
|
44 |
+
elif abs(channel_idx - self.current_channel) >= 3:
|
45 |
+
return far_away_power
|
46 |
+
else:
|
47 |
+
return -30 # Unjammed
|
48 |
+
|
49 |
+
def step(self, action):
|
50 |
+
assert self.action_space.contains(action), "Invalid action"
|
51 |
+
|
52 |
+
received_power = self._get_received_power(action)
|
53 |
+
if received_power >= 0:
|
54 |
+
reward = 1.0
|
55 |
+
else:
|
56 |
+
reward = 0.0
|
57 |
+
|
58 |
+
if self.current_channel != action:
|
59 |
+
reward *= 0.9 # Channel switching cost
|
60 |
+
|
61 |
+
self.current_channel = action
|
62 |
+
|
63 |
+
if self.jammer_mode == 'random':
|
64 |
+
self.jammer_frequency = np.random.uniform(self.frequency_range[0], self.frequency_range[1])
|
65 |
+
elif self.jammer_mode == 'sweeping':
|
66 |
+
self.jammer_frequency += self.channel_bandwidth
|
67 |
+
if self.jammer_frequency > self.frequency_range[1]:
|
68 |
+
self.jammer_frequency = self.frequency_range[0]
|
69 |
+
|
70 |
+
self.observation = np.array([self._get_received_power(i) for i in range(self.num_channels)])
|
71 |
+
|
72 |
+
return self.observation, reward, False, {}
|
73 |
+
|
74 |
+
def reset(self):
|
75 |
+
self.current_channel = np.random.randint(self.num_channels)
|
76 |
+
self.jammer_mode = np.random.choice(self.jammer_modes)
|
77 |
+
self.jammer_frequency = np.random.uniform(self.frequency_range[0], self.frequency_range[1])
|
78 |
+
|
79 |
+
self.observation = np.array([self._get_received_power(i) for i in range(self.num_channels)])
|
80 |
+
return self.observation
|
81 |
+
|
82 |
+
def render(self, mode='human'):
|
83 |
+
pass
|
84 |
+
|
85 |
+
def close(self):
|
86 |
+
pass
|
87 |
+
|
88 |
+
|
89 |
+
# Test the environment
|
90 |
+
env = AntiJamEnv()
|
91 |
+
observation = env.reset()
|
92 |
+
for _ in range(10):
|
93 |
+
action = env.action_space.sample()
|
94 |
+
observation, reward, done, _ = env.step(action)
|
95 |
+
print("Action:", action, "Reward:", reward, "Observation:", observation)
|
96 |
+
if done:
|
97 |
+
break
|
antiJamming_v1.py
DELETED
@@ -1,139 +0,0 @@
|
|
1 |
-
#!/usr/bin/env python3
|
2 |
-
# -*- coding: utf-8 -*-
|
3 |
-
|
4 |
-
import gym
|
5 |
-
import tensorflow as tf
|
6 |
-
import tf_slim as slim
|
7 |
-
import numpy as np
|
8 |
-
import matplotlib as mpl
|
9 |
-
import matplotlib.pyplot as plt
|
10 |
-
import json
|
11 |
-
from tensorflow import keras
|
12 |
-
from ns3gym import ns3env
|
13 |
-
|
14 |
-
env = gym.make('ns3-v0')
|
15 |
-
ob_space = env.observation_space
|
16 |
-
ac_space = env.action_space
|
17 |
-
print("Observation space: ", ob_space, ob_space.dtype)
|
18 |
-
print("Action space: ", ac_space, ac_space.n)
|
19 |
-
|
20 |
-
s_size = ob_space.shape[0]
|
21 |
-
a_size = ac_space.n
|
22 |
-
jammerType = 'combined'
|
23 |
-
|
24 |
-
model = keras.Sequential()
|
25 |
-
model.add(keras.layers.Dense(s_size, input_shape=(s_size,), activation='relu'))
|
26 |
-
model.add(keras.layers.Dense(32, activation='relu'))
|
27 |
-
model.add(keras.layers.Dense(a_size, activation='softmax'))
|
28 |
-
model.compile(optimizer=tf.optimizers.Adam(0.001),
|
29 |
-
loss='categorical_crossentropy',
|
30 |
-
metrics=['accuracy'])
|
31 |
-
|
32 |
-
total_episodes = 1
|
33 |
-
max_env_steps = 1
|
34 |
-
env._max_episode_steps = max_env_steps
|
35 |
-
|
36 |
-
epsilon = 1.0 # exploration rate
|
37 |
-
epsilon_min = 0.01
|
38 |
-
epsilon_decay = 0.99
|
39 |
-
|
40 |
-
time_history = []
|
41 |
-
rew_history = []
|
42 |
-
|
43 |
-
# Training agent
|
44 |
-
for e in range(total_episodes):
|
45 |
-
|
46 |
-
state = env.reset()
|
47 |
-
state = np.reshape(state, [1, s_size])
|
48 |
-
rewardsum = 0
|
49 |
-
for time in range(max_env_steps):
|
50 |
-
# Choose action
|
51 |
-
if np.random.rand(1) < epsilon:
|
52 |
-
action = np.random.randint(a_size)
|
53 |
-
else:
|
54 |
-
action = np.argmax(model.predict(state)[0])
|
55 |
-
|
56 |
-
# Step
|
57 |
-
next_state, reward, done, _ = env.step(action)
|
58 |
-
|
59 |
-
if done or time == max_env_steps - 1:
|
60 |
-
print("episode: {}/{}, time: {}, rew: {}, eps: {:.2}"
|
61 |
-
.format(e, total_episodes, time, rewardsum, epsilon))
|
62 |
-
break
|
63 |
-
|
64 |
-
next_state = np.reshape(next_state, [1, s_size])
|
65 |
-
|
66 |
-
# Train
|
67 |
-
target = reward
|
68 |
-
if not done:
|
69 |
-
target = (reward + 0.95 * np.amax(model.predict(next_state)[0]))
|
70 |
-
|
71 |
-
target_f = model.predict(state)
|
72 |
-
target_f[0][action] = target
|
73 |
-
model.fit(state, target_f, epochs=1, verbose=0)
|
74 |
-
|
75 |
-
state = next_state
|
76 |
-
rewardsum += reward
|
77 |
-
if epsilon > epsilon_min: epsilon *= epsilon_decay
|
78 |
-
|
79 |
-
time_history.append(time)
|
80 |
-
rew_history.append(rewardsum)
|
81 |
-
# Implementing early break
|
82 |
-
|
83 |
-
# Plotting Learning Performance
|
84 |
-
print("Plot Learning Performance")
|
85 |
-
mpl.rcdefaults()
|
86 |
-
mpl.rcParams.update({'font.size': 16})
|
87 |
-
|
88 |
-
fig, ax = plt.subplots(figsize=(10, 4))
|
89 |
-
plt.grid(True, linestyle='--')
|
90 |
-
plt.title('Learning Performance')
|
91 |
-
plt.plot(range(len(time_history)), time_history, label='Steps', marker="^", linestyle=":") # , color='red')
|
92 |
-
plt.plot(range(len(rew_history)), rew_history, label='Reward', marker="", linestyle="-") # , color='k')
|
93 |
-
plt.xlabel('Episode')
|
94 |
-
plt.ylabel('Time')
|
95 |
-
plt.legend(prop={'size': 12})
|
96 |
-
|
97 |
-
plt.savefig('learning.pdf', bbox_inches='tight')
|
98 |
-
plt.show()
|
99 |
-
|
100 |
-
# for n in range(2 ** s_size):
|
101 |
-
# state = [n >> i & 1 for i in range(0, 2)]
|
102 |
-
# state = np.reshape(state, [1, s_size])
|
103 |
-
# print("state " + str(state)
|
104 |
-
# + " -> prediction " + str(model.predict(state)[0])
|
105 |
-
# )
|
106 |
-
|
107 |
-
# Testing agent
|
108 |
-
n_runs = 1
|
109 |
-
total_trans_pkts = 0
|
110 |
-
|
111 |
-
for run in range(n_runs):
|
112 |
-
state = env.reset()
|
113 |
-
state = np.reshape(state, [1, s_size])
|
114 |
-
total_trans_pkts_per_run = 0
|
115 |
-
for time in range(max_env_steps):
|
116 |
-
# Choose Channel
|
117 |
-
action = np.argmax(model.predict(state)[0])
|
118 |
-
# Step
|
119 |
-
next_state, reward, done, _ = env.step(action)
|
120 |
-
total_trans_pkts_per_run += reward
|
121 |
-
if done or time == max_env_steps - 1:
|
122 |
-
break
|
123 |
-
next_state = np.reshape(next_state, [1, s_size])
|
124 |
-
# Test
|
125 |
-
state = next_state
|
126 |
-
|
127 |
-
print(f"Run: {run}/{n_runs}, Total transferred packets: {total_trans_pkts_per_run}")
|
128 |
-
total_trans_pkts += total_trans_pkts_per_run
|
129 |
-
|
130 |
-
# print(model.get_config())
|
131 |
-
# print(model.to_json())
|
132 |
-
# print(model.get_weights())
|
133 |
-
|
134 |
-
# Save Results for this time slots value
|
135 |
-
normalizedThroughput = total_trans_pkts / (100 * n_runs)
|
136 |
-
print(f'The normalized throughput is: {normalizedThroughput}')
|
137 |
-
filename = f'{jammerType}_timeSlots_{max_env_steps}.json'
|
138 |
-
with open(filename, 'w') as f:
|
139 |
-
json.dump(normalizedThroughput, f)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tester.py
ADDED
@@ -0,0 +1,167 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
|
4 |
+
import os
|
5 |
+
from os import mkdir
|
6 |
+
import gym
|
7 |
+
import tensorflow as tf
|
8 |
+
import tf_slim as slim
|
9 |
+
import numpy as np
|
10 |
+
import matplotlib as mpl
|
11 |
+
import matplotlib.pyplot as plt
|
12 |
+
import json
|
13 |
+
from tensorflow import keras
|
14 |
+
from ns3gym import ns3env
|
15 |
+
from DDQN_FNN import DoubleDeepQNetwork
|
16 |
+
|
17 |
+
jammerTypes = ['dynamic_pattern', 'combined', 'sweeping', 'random']
|
18 |
+
jammerType = jammerTypes[0]
|
19 |
+
network = 'FNN'
|
20 |
+
cscs = [0, 0.1, 0.2, 0.3, 0.4] # Channel switching cost
|
21 |
+
|
22 |
+
for csc in cscs:
|
23 |
+
env = gym.make('ns3-v0')
|
24 |
+
ob_space = env.observation_space
|
25 |
+
ac_space = env.action_space
|
26 |
+
print("Observation space: ", ob_space, ob_space.dtype)
|
27 |
+
print("Action space: ", ac_space, ac_space.n)
|
28 |
+
|
29 |
+
s_size = ob_space.shape[0]
|
30 |
+
a_size = ac_space.n
|
31 |
+
total_episodes = 200
|
32 |
+
max_env_steps = 100
|
33 |
+
train_end = 0
|
34 |
+
TRAIN_Episodes = 100
|
35 |
+
remaining_Episodes = 0
|
36 |
+
env._max_episode_steps = max_env_steps
|
37 |
+
|
38 |
+
epsilon = 1.0 # exploration rate
|
39 |
+
epsilon_min = 0.01
|
40 |
+
epsilon_decay = 0.999
|
41 |
+
discount_rate = 0.95
|
42 |
+
lr = 0.001
|
43 |
+
batch_size = 32
|
44 |
+
|
45 |
+
DDQN_agent = DoubleDeepQNetwork(s_size, a_size, lr, discount_rate, epsilon, epsilon_min, epsilon_decay)
|
46 |
+
rewards = [] # Store rewards for graphing
|
47 |
+
epsilons = [] # Store the Explore/Exploit
|
48 |
+
|
49 |
+
# Training agent
|
50 |
+
for e in range(TRAIN_Episodes):
|
51 |
+
state = env.reset()
|
52 |
+
# print(f"Initial state is: {state}")
|
53 |
+
state = np.reshape(state, [1, s_size]) # Resize to store in memory to pass to .predict
|
54 |
+
tot_rewards = 0
|
55 |
+
previous_action = 0
|
56 |
+
for time in range(max_env_steps): # 200 is when you "solve" the game. This can continue forever as far as I know
|
57 |
+
action = DDQN_agent.action(state)
|
58 |
+
next_state, reward, done, _ = env.step(action)
|
59 |
+
# print(f'The next state is: {next_state}')
|
60 |
+
# done: Three collisions occurred in the last 10 steps.
|
61 |
+
# time == max_env_steps - 1 : No collisions occurred
|
62 |
+
if done or time == max_env_steps - 1:
|
63 |
+
rewards.append(tot_rewards)
|
64 |
+
epsilons.append(DDQN_agent.epsilon)
|
65 |
+
print("episode: {}/{}, score: {}, e: {}"
|
66 |
+
.format(e, TRAIN_Episodes, tot_rewards, DDQN_agent.epsilon))
|
67 |
+
break
|
68 |
+
# Applying channel switching cost
|
69 |
+
if action != previous_action:
|
70 |
+
reward -= csc
|
71 |
+
next_state = np.reshape(next_state, [1, s_size])
|
72 |
+
tot_rewards += reward
|
73 |
+
DDQN_agent.store(state, action, reward, next_state, done) # Resize to store in memory to pass to .predict
|
74 |
+
state = next_state
|
75 |
+
previous_action = action
|
76 |
+
|
77 |
+
# Experience Replay
|
78 |
+
if len(DDQN_agent.memory) > batch_size:
|
79 |
+
DDQN_agent.experience_replay(batch_size)
|
80 |
+
# Update the weights after each episode (You can configure this for x steps as well
|
81 |
+
DDQN_agent.update_target_from_model()
|
82 |
+
# If our current NN passes we are done
|
83 |
+
# Early stopping criteria: I am going to use the last 10 runs within 1% of the max
|
84 |
+
if len(rewards) > 10 and np.average(rewards[-10:]) >= max_env_steps - 0.10 * max_env_steps:
|
85 |
+
# Set the rest of the episodes for testing
|
86 |
+
remaining_Episodes = total_episodes - e
|
87 |
+
train_end = e
|
88 |
+
break
|
89 |
+
|
90 |
+
# Testing
|
91 |
+
print('Training complete. Testing started...')
|
92 |
+
# TEST Time
|
93 |
+
# In this section we ALWAYS use exploit as we don't train anymore
|
94 |
+
total_transmissions = 0
|
95 |
+
successful_transmissions = 0
|
96 |
+
if remaining_Episodes == 0:
|
97 |
+
train_end = TRAIN_Episodes
|
98 |
+
TEST_Episodes = 100
|
99 |
+
else:
|
100 |
+
TEST_Episodes = total_episodes - train_end
|
101 |
+
# Testing Loop
|
102 |
+
n_channel_switches = 0
|
103 |
+
for e_test in range(TEST_Episodes):
|
104 |
+
state = env.reset()
|
105 |
+
state = np.reshape(state, [1, s_size])
|
106 |
+
tot_rewards = 0
|
107 |
+
previous_channel = 0
|
108 |
+
for t_test in range(max_env_steps):
|
109 |
+
action = DDQN_agent.test_action(state)
|
110 |
+
next_state, reward, done, _ = env.step(action)
|
111 |
+
if done or t_test == max_env_steps - 1:
|
112 |
+
rewards.append(tot_rewards)
|
113 |
+
epsilons.append(0) # We are doing full exploit
|
114 |
+
print("episode: {}/{}, score: {}, e: {}"
|
115 |
+
.format(e_test, TEST_Episodes, tot_rewards, 0))
|
116 |
+
break
|
117 |
+
next_state = np.reshape(next_state, [1, s_size])
|
118 |
+
tot_rewards += reward
|
119 |
+
if action != previous_channel:
|
120 |
+
n_channel_switches += 1
|
121 |
+
if reward == 1:
|
122 |
+
successful_transmissions += 1
|
123 |
+
# DON'T STORE ANYTHING DURING TESTING
|
124 |
+
state = next_state
|
125 |
+
previous_channel = action
|
126 |
+
# done: More than 3 collisions occurred in the last 10 steps.
|
127 |
+
# t_test == max_env_steps - 1: No collisions occurred
|
128 |
+
total_transmissions += 1
|
129 |
+
|
130 |
+
# Plotting
|
131 |
+
plotName = f'results/{network}/{jammerType}_csc_{csc}.png'
|
132 |
+
rolling_average = np.convolve(rewards, np.ones(10) / 10)
|
133 |
+
plt.plot(rewards)
|
134 |
+
plt.plot(rolling_average, color='black')
|
135 |
+
plt.axhline(y=max_env_steps - 0.10 * max_env_steps, color='r', linestyle='-') # Solved Line
|
136 |
+
# Scale Epsilon (0.001 - 1.0) to match reward (0 - 200) range
|
137 |
+
eps_graph = [200 * x for x in epsilons]
|
138 |
+
plt.plot(eps_graph, color='g', linestyle='-')
|
139 |
+
# Plot the line where TESTING begins
|
140 |
+
plt.axvline(x=train_end, color='y', linestyle='-')
|
141 |
+
plt.xlim((0, train_end+TEST_Episodes))
|
142 |
+
plt.ylim((0, max_env_steps))
|
143 |
+
plt.xlabel('Episodes')
|
144 |
+
plt.ylabel('Rewards')
|
145 |
+
plt.savefig(plotName, bbox_inches='tight')
|
146 |
+
# plt.show()
|
147 |
+
|
148 |
+
# Save Results
|
149 |
+
# Rewards
|
150 |
+
fileName = f'results/{network}/rewards_{jammerType}_csc_{csc}.json'
|
151 |
+
with open(fileName, 'w') as f:
|
152 |
+
json.dump(rewards, f)
|
153 |
+
# Normalized throughput
|
154 |
+
normalizedThroughput = successful_transmissions / (TEST_Episodes*(max_env_steps-2))
|
155 |
+
print(f'The normalized throughput is: {normalizedThroughput}')
|
156 |
+
fileName = f'results/{network}/throughput_{jammerType}_csc_{csc}.json'
|
157 |
+
with open(fileName, 'w') as f:
|
158 |
+
json.dump(normalizedThroughput, f)
|
159 |
+
# Channel switching times
|
160 |
+
normalized_cst = n_channel_switches / (TEST_Episodes*(max_env_steps-2))
|
161 |
+
print(f'The normalized channel switching times is: {normalized_cst}')
|
162 |
+
fileName = f'results/{network}/times_{jammerType}_csc_{csc}.json'
|
163 |
+
with open(fileName, 'w') as f:
|
164 |
+
json.dump(normalized_cst, f)
|
165 |
+
# Save the agent as a SavedAgent.
|
166 |
+
agentName = f'savedAgents/{network}/DDQNAgent_{jammerType}_csc_{csc}'
|
167 |
+
DDQN_agent.save_model(agentName)
|
trainer.py
ADDED
@@ -0,0 +1,167 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
|
4 |
+
import os
|
5 |
+
from os import mkdir
|
6 |
+
import gym
|
7 |
+
import tensorflow as tf
|
8 |
+
import tf_slim as slim
|
9 |
+
import numpy as np
|
10 |
+
import matplotlib as mpl
|
11 |
+
import matplotlib.pyplot as plt
|
12 |
+
import json
|
13 |
+
from tensorflow import keras
|
14 |
+
from ns3gym import ns3env
|
15 |
+
from DDQN_FNN import DoubleDeepQNetwork
|
16 |
+
|
17 |
+
jammerTypes = ['dynamic_pattern', 'combined', 'sweeping', 'random']
|
18 |
+
jammerType = jammerTypes[0]
|
19 |
+
network = 'FNN'
|
20 |
+
cscs = [0, 0.1, 0.2, 0.3, 0.4] # Channel switching cost
|
21 |
+
|
22 |
+
for csc in cscs:
|
23 |
+
env = gym.make('ns3-v0')
|
24 |
+
ob_space = env.observation_space
|
25 |
+
ac_space = env.action_space
|
26 |
+
print("Observation space: ", ob_space, ob_space.dtype)
|
27 |
+
print("Action space: ", ac_space, ac_space.n)
|
28 |
+
|
29 |
+
s_size = ob_space.shape[0]
|
30 |
+
a_size = ac_space.n
|
31 |
+
total_episodes = 200
|
32 |
+
max_env_steps = 100
|
33 |
+
train_end = 0
|
34 |
+
TRAIN_Episodes = 100
|
35 |
+
remaining_Episodes = 0
|
36 |
+
env._max_episode_steps = max_env_steps
|
37 |
+
|
38 |
+
epsilon = 1.0 # exploration rate
|
39 |
+
epsilon_min = 0.01
|
40 |
+
epsilon_decay = 0.999
|
41 |
+
discount_rate = 0.95
|
42 |
+
lr = 0.001
|
43 |
+
batch_size = 32
|
44 |
+
|
45 |
+
DDQN_agent = DoubleDeepQNetwork(s_size, a_size, lr, discount_rate, epsilon, epsilon_min, epsilon_decay)
|
46 |
+
rewards = [] # Store rewards for graphing
|
47 |
+
epsilons = [] # Store the Explore/Exploit
|
48 |
+
|
49 |
+
# Training agent
|
50 |
+
for e in range(TRAIN_Episodes):
|
51 |
+
state = env.reset()
|
52 |
+
# print(f"Initial state is: {state}")
|
53 |
+
state = np.reshape(state, [1, s_size]) # Resize to store in memory to pass to .predict
|
54 |
+
tot_rewards = 0
|
55 |
+
previous_action = 0
|
56 |
+
for time in range(max_env_steps): # 200 is when you "solve" the game. This can continue forever as far as I know
|
57 |
+
action = DDQN_agent.action(state)
|
58 |
+
next_state, reward, done, _ = env.step(action)
|
59 |
+
# print(f'The next state is: {next_state}')
|
60 |
+
# done: Three collisions occurred in the last 10 steps.
|
61 |
+
# time == max_env_steps - 1 : No collisions occurred
|
62 |
+
if done or time == max_env_steps - 1:
|
63 |
+
rewards.append(tot_rewards)
|
64 |
+
epsilons.append(DDQN_agent.epsilon)
|
65 |
+
print("episode: {}/{}, score: {}, e: {}"
|
66 |
+
.format(e, TRAIN_Episodes, tot_rewards, DDQN_agent.epsilon))
|
67 |
+
break
|
68 |
+
# Applying channel switching cost
|
69 |
+
if action != previous_action:
|
70 |
+
reward -= csc
|
71 |
+
next_state = np.reshape(next_state, [1, s_size])
|
72 |
+
tot_rewards += reward
|
73 |
+
DDQN_agent.store(state, action, reward, next_state, done) # Resize to store in memory to pass to .predict
|
74 |
+
state = next_state
|
75 |
+
previous_action = action
|
76 |
+
|
77 |
+
# Experience Replay
|
78 |
+
if len(DDQN_agent.memory) > batch_size:
|
79 |
+
DDQN_agent.experience_replay(batch_size)
|
80 |
+
# Update the weights after each episode (You can configure this for x steps as well
|
81 |
+
DDQN_agent.update_target_from_model()
|
82 |
+
# If our current NN passes we are done
|
83 |
+
# Early stopping criteria: I am going to use the last 10 runs within 1% of the max
|
84 |
+
if len(rewards) > 10 and np.average(rewards[-10:]) >= max_env_steps - 0.10 * max_env_steps:
|
85 |
+
# Set the rest of the episodes for testing
|
86 |
+
remaining_Episodes = total_episodes - e
|
87 |
+
train_end = e
|
88 |
+
break
|
89 |
+
|
90 |
+
# Testing
|
91 |
+
print('Training complete. Testing started...')
|
92 |
+
# TEST Time
|
93 |
+
# In this section we ALWAYS use exploit as we don't train anymore
|
94 |
+
total_transmissions = 0
|
95 |
+
successful_transmissions = 0
|
96 |
+
if remaining_Episodes == 0:
|
97 |
+
train_end = TRAIN_Episodes
|
98 |
+
TEST_Episodes = 100
|
99 |
+
else:
|
100 |
+
TEST_Episodes = total_episodes - train_end
|
101 |
+
# Testing Loop
|
102 |
+
n_channel_switches = 0
|
103 |
+
for e_test in range(TEST_Episodes):
|
104 |
+
state = env.reset()
|
105 |
+
state = np.reshape(state, [1, s_size])
|
106 |
+
tot_rewards = 0
|
107 |
+
previous_channel = 0
|
108 |
+
for t_test in range(max_env_steps):
|
109 |
+
action = DDQN_agent.test_action(state)
|
110 |
+
next_state, reward, done, _ = env.step(action)
|
111 |
+
if done or t_test == max_env_steps - 1:
|
112 |
+
rewards.append(tot_rewards)
|
113 |
+
epsilons.append(0) # We are doing full exploit
|
114 |
+
print("episode: {}/{}, score: {}, e: {}"
|
115 |
+
.format(e_test, TEST_Episodes, tot_rewards, 0))
|
116 |
+
break
|
117 |
+
next_state = np.reshape(next_state, [1, s_size])
|
118 |
+
tot_rewards += reward
|
119 |
+
if action != previous_channel:
|
120 |
+
n_channel_switches += 1
|
121 |
+
if reward == 1:
|
122 |
+
successful_transmissions += 1
|
123 |
+
# DON'T STORE ANYTHING DURING TESTING
|
124 |
+
state = next_state
|
125 |
+
previous_channel = action
|
126 |
+
# done: More than 3 collisions occurred in the last 10 steps.
|
127 |
+
# t_test == max_env_steps - 1: No collisions occurred
|
128 |
+
total_transmissions += 1
|
129 |
+
|
130 |
+
# Plotting
|
131 |
+
plotName = f'results/{network}/{jammerType}_csc_{csc}.png'
|
132 |
+
rolling_average = np.convolve(rewards, np.ones(10) / 10)
|
133 |
+
plt.plot(rewards)
|
134 |
+
plt.plot(rolling_average, color='black')
|
135 |
+
plt.axhline(y=max_env_steps - 0.10 * max_env_steps, color='r', linestyle='-') # Solved Line
|
136 |
+
# Scale Epsilon (0.001 - 1.0) to match reward (0 - 200) range
|
137 |
+
eps_graph = [200 * x for x in epsilons]
|
138 |
+
plt.plot(eps_graph, color='g', linestyle='-')
|
139 |
+
# Plot the line where TESTING begins
|
140 |
+
plt.axvline(x=train_end, color='y', linestyle='-')
|
141 |
+
plt.xlim((0, train_end+TEST_Episodes))
|
142 |
+
plt.ylim((0, max_env_steps))
|
143 |
+
plt.xlabel('Episodes')
|
144 |
+
plt.ylabel('Rewards')
|
145 |
+
plt.savefig(plotName, bbox_inches='tight')
|
146 |
+
# plt.show()
|
147 |
+
|
148 |
+
# Save Results
|
149 |
+
# Rewards
|
150 |
+
fileName = f'results/{network}/rewards_{jammerType}_csc_{csc}.json'
|
151 |
+
with open(fileName, 'w') as f:
|
152 |
+
json.dump(rewards, f)
|
153 |
+
# Normalized throughput
|
154 |
+
normalizedThroughput = successful_transmissions / (TEST_Episodes*(max_env_steps-2))
|
155 |
+
print(f'The normalized throughput is: {normalizedThroughput}')
|
156 |
+
fileName = f'results/{network}/throughput_{jammerType}_csc_{csc}.json'
|
157 |
+
with open(fileName, 'w') as f:
|
158 |
+
json.dump(normalizedThroughput, f)
|
159 |
+
# Channel switching times
|
160 |
+
normalized_cst = n_channel_switches / (TEST_Episodes*(max_env_steps-2))
|
161 |
+
print(f'The normalized channel switching times is: {normalized_cst}')
|
162 |
+
fileName = f'results/{network}/times_{jammerType}_csc_{csc}.json'
|
163 |
+
with open(fileName, 'w') as f:
|
164 |
+
json.dump(normalized_cst, f)
|
165 |
+
# Save the agent as a SavedAgent.
|
166 |
+
agentName = f'savedAgents/{network}/DDQNAgent_{jammerType}_csc_{csc}'
|
167 |
+
DDQN_agent.save_model(agentName)
|