asataura commited on
Commit
d67dca9
·
1 Parent(s): 7439a65

Creating trainer and tester modules

Browse files
Files changed (5) hide show
  1. DDQN_FNN.py → DDQN.py +0 -0
  2. antiJamEnv.py +97 -0
  3. antiJamming_v1.py +0 -139
  4. tester.py +167 -0
  5. trainer.py +167 -0
DDQN_FNN.py → DDQN.py RENAMED
File without changes
antiJamEnv.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ import gym
5
+ import tensorflow as tf
6
+ import tf_slim as slim
7
+ import numpy as np
8
+ import matplotlib as mpl
9
+ import matplotlib.pyplot as plt
10
+ import json
11
+ from tensorflow import keras
12
+ from ns3gym import ns3env
13
+ import gym
14
+ from gym import spaces
15
+ import numpy as np
16
+
17
+
18
+ class AntiJamEnv(gym.Env):
19
+ def __init__(self):
20
+ super(AntiJamEnv, self).__init__()
21
+
22
+ self.num_channels = 8
23
+ self.channel_bandwidth = 20 # MHz
24
+ self.frequency_range = [5180, 5320] # MHz
25
+
26
+ self.observation_space = spaces.Box(low=-30, high=40, shape=(self.num_channels,), dtype=np.float32)
27
+ self.action_space = spaces.Discrete(self.num_channels)
28
+
29
+ self.current_channel = np.random.randint(self.num_channels)
30
+ self.jammer_modes = ['constant', 'random', 'sweeping']
31
+ self.jammer_mode = np.random.choice(self.jammer_modes)
32
+ self.jammer_frequency = np.random.uniform(self.frequency_range[0], self.frequency_range[1])
33
+
34
+ def _get_received_power(self, channel_idx):
35
+ # Simulate received jamming power using normal distribution
36
+ jammed_power = np.random.normal(loc=30, scale=5)
37
+ adjacent_power = np.random.normal(loc=13, scale=3)
38
+ far_away_power = np.random.normal(loc=-7, scale=1)
39
+
40
+ if channel_idx == self.current_channel:
41
+ return jammed_power
42
+ elif abs(channel_idx - self.current_channel) == 1:
43
+ return adjacent_power
44
+ elif abs(channel_idx - self.current_channel) >= 3:
45
+ return far_away_power
46
+ else:
47
+ return -30 # Unjammed
48
+
49
+ def step(self, action):
50
+ assert self.action_space.contains(action), "Invalid action"
51
+
52
+ received_power = self._get_received_power(action)
53
+ if received_power >= 0:
54
+ reward = 1.0
55
+ else:
56
+ reward = 0.0
57
+
58
+ if self.current_channel != action:
59
+ reward *= 0.9 # Channel switching cost
60
+
61
+ self.current_channel = action
62
+
63
+ if self.jammer_mode == 'random':
64
+ self.jammer_frequency = np.random.uniform(self.frequency_range[0], self.frequency_range[1])
65
+ elif self.jammer_mode == 'sweeping':
66
+ self.jammer_frequency += self.channel_bandwidth
67
+ if self.jammer_frequency > self.frequency_range[1]:
68
+ self.jammer_frequency = self.frequency_range[0]
69
+
70
+ self.observation = np.array([self._get_received_power(i) for i in range(self.num_channels)])
71
+
72
+ return self.observation, reward, False, {}
73
+
74
+ def reset(self):
75
+ self.current_channel = np.random.randint(self.num_channels)
76
+ self.jammer_mode = np.random.choice(self.jammer_modes)
77
+ self.jammer_frequency = np.random.uniform(self.frequency_range[0], self.frequency_range[1])
78
+
79
+ self.observation = np.array([self._get_received_power(i) for i in range(self.num_channels)])
80
+ return self.observation
81
+
82
+ def render(self, mode='human'):
83
+ pass
84
+
85
+ def close(self):
86
+ pass
87
+
88
+
89
+ # Test the environment
90
+ env = AntiJamEnv()
91
+ observation = env.reset()
92
+ for _ in range(10):
93
+ action = env.action_space.sample()
94
+ observation, reward, done, _ = env.step(action)
95
+ print("Action:", action, "Reward:", reward, "Observation:", observation)
96
+ if done:
97
+ break
antiJamming_v1.py DELETED
@@ -1,139 +0,0 @@
1
- #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
-
4
- import gym
5
- import tensorflow as tf
6
- import tf_slim as slim
7
- import numpy as np
8
- import matplotlib as mpl
9
- import matplotlib.pyplot as plt
10
- import json
11
- from tensorflow import keras
12
- from ns3gym import ns3env
13
-
14
- env = gym.make('ns3-v0')
15
- ob_space = env.observation_space
16
- ac_space = env.action_space
17
- print("Observation space: ", ob_space, ob_space.dtype)
18
- print("Action space: ", ac_space, ac_space.n)
19
-
20
- s_size = ob_space.shape[0]
21
- a_size = ac_space.n
22
- jammerType = 'combined'
23
-
24
- model = keras.Sequential()
25
- model.add(keras.layers.Dense(s_size, input_shape=(s_size,), activation='relu'))
26
- model.add(keras.layers.Dense(32, activation='relu'))
27
- model.add(keras.layers.Dense(a_size, activation='softmax'))
28
- model.compile(optimizer=tf.optimizers.Adam(0.001),
29
- loss='categorical_crossentropy',
30
- metrics=['accuracy'])
31
-
32
- total_episodes = 1
33
- max_env_steps = 1
34
- env._max_episode_steps = max_env_steps
35
-
36
- epsilon = 1.0 # exploration rate
37
- epsilon_min = 0.01
38
- epsilon_decay = 0.99
39
-
40
- time_history = []
41
- rew_history = []
42
-
43
- # Training agent
44
- for e in range(total_episodes):
45
-
46
- state = env.reset()
47
- state = np.reshape(state, [1, s_size])
48
- rewardsum = 0
49
- for time in range(max_env_steps):
50
- # Choose action
51
- if np.random.rand(1) < epsilon:
52
- action = np.random.randint(a_size)
53
- else:
54
- action = np.argmax(model.predict(state)[0])
55
-
56
- # Step
57
- next_state, reward, done, _ = env.step(action)
58
-
59
- if done or time == max_env_steps - 1:
60
- print("episode: {}/{}, time: {}, rew: {}, eps: {:.2}"
61
- .format(e, total_episodes, time, rewardsum, epsilon))
62
- break
63
-
64
- next_state = np.reshape(next_state, [1, s_size])
65
-
66
- # Train
67
- target = reward
68
- if not done:
69
- target = (reward + 0.95 * np.amax(model.predict(next_state)[0]))
70
-
71
- target_f = model.predict(state)
72
- target_f[0][action] = target
73
- model.fit(state, target_f, epochs=1, verbose=0)
74
-
75
- state = next_state
76
- rewardsum += reward
77
- if epsilon > epsilon_min: epsilon *= epsilon_decay
78
-
79
- time_history.append(time)
80
- rew_history.append(rewardsum)
81
- # Implementing early break
82
-
83
- # Plotting Learning Performance
84
- print("Plot Learning Performance")
85
- mpl.rcdefaults()
86
- mpl.rcParams.update({'font.size': 16})
87
-
88
- fig, ax = plt.subplots(figsize=(10, 4))
89
- plt.grid(True, linestyle='--')
90
- plt.title('Learning Performance')
91
- plt.plot(range(len(time_history)), time_history, label='Steps', marker="^", linestyle=":") # , color='red')
92
- plt.plot(range(len(rew_history)), rew_history, label='Reward', marker="", linestyle="-") # , color='k')
93
- plt.xlabel('Episode')
94
- plt.ylabel('Time')
95
- plt.legend(prop={'size': 12})
96
-
97
- plt.savefig('learning.pdf', bbox_inches='tight')
98
- plt.show()
99
-
100
- # for n in range(2 ** s_size):
101
- # state = [n >> i & 1 for i in range(0, 2)]
102
- # state = np.reshape(state, [1, s_size])
103
- # print("state " + str(state)
104
- # + " -> prediction " + str(model.predict(state)[0])
105
- # )
106
-
107
- # Testing agent
108
- n_runs = 1
109
- total_trans_pkts = 0
110
-
111
- for run in range(n_runs):
112
- state = env.reset()
113
- state = np.reshape(state, [1, s_size])
114
- total_trans_pkts_per_run = 0
115
- for time in range(max_env_steps):
116
- # Choose Channel
117
- action = np.argmax(model.predict(state)[0])
118
- # Step
119
- next_state, reward, done, _ = env.step(action)
120
- total_trans_pkts_per_run += reward
121
- if done or time == max_env_steps - 1:
122
- break
123
- next_state = np.reshape(next_state, [1, s_size])
124
- # Test
125
- state = next_state
126
-
127
- print(f"Run: {run}/{n_runs}, Total transferred packets: {total_trans_pkts_per_run}")
128
- total_trans_pkts += total_trans_pkts_per_run
129
-
130
- # print(model.get_config())
131
- # print(model.to_json())
132
- # print(model.get_weights())
133
-
134
- # Save Results for this time slots value
135
- normalizedThroughput = total_trans_pkts / (100 * n_runs)
136
- print(f'The normalized throughput is: {normalizedThroughput}')
137
- filename = f'{jammerType}_timeSlots_{max_env_steps}.json'
138
- with open(filename, 'w') as f:
139
- json.dump(normalizedThroughput, f)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tester.py ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ import os
5
+ from os import mkdir
6
+ import gym
7
+ import tensorflow as tf
8
+ import tf_slim as slim
9
+ import numpy as np
10
+ import matplotlib as mpl
11
+ import matplotlib.pyplot as plt
12
+ import json
13
+ from tensorflow import keras
14
+ from ns3gym import ns3env
15
+ from DDQN_FNN import DoubleDeepQNetwork
16
+
17
+ jammerTypes = ['dynamic_pattern', 'combined', 'sweeping', 'random']
18
+ jammerType = jammerTypes[0]
19
+ network = 'FNN'
20
+ cscs = [0, 0.1, 0.2, 0.3, 0.4] # Channel switching cost
21
+
22
+ for csc in cscs:
23
+ env = gym.make('ns3-v0')
24
+ ob_space = env.observation_space
25
+ ac_space = env.action_space
26
+ print("Observation space: ", ob_space, ob_space.dtype)
27
+ print("Action space: ", ac_space, ac_space.n)
28
+
29
+ s_size = ob_space.shape[0]
30
+ a_size = ac_space.n
31
+ total_episodes = 200
32
+ max_env_steps = 100
33
+ train_end = 0
34
+ TRAIN_Episodes = 100
35
+ remaining_Episodes = 0
36
+ env._max_episode_steps = max_env_steps
37
+
38
+ epsilon = 1.0 # exploration rate
39
+ epsilon_min = 0.01
40
+ epsilon_decay = 0.999
41
+ discount_rate = 0.95
42
+ lr = 0.001
43
+ batch_size = 32
44
+
45
+ DDQN_agent = DoubleDeepQNetwork(s_size, a_size, lr, discount_rate, epsilon, epsilon_min, epsilon_decay)
46
+ rewards = [] # Store rewards for graphing
47
+ epsilons = [] # Store the Explore/Exploit
48
+
49
+ # Training agent
50
+ for e in range(TRAIN_Episodes):
51
+ state = env.reset()
52
+ # print(f"Initial state is: {state}")
53
+ state = np.reshape(state, [1, s_size]) # Resize to store in memory to pass to .predict
54
+ tot_rewards = 0
55
+ previous_action = 0
56
+ for time in range(max_env_steps): # 200 is when you "solve" the game. This can continue forever as far as I know
57
+ action = DDQN_agent.action(state)
58
+ next_state, reward, done, _ = env.step(action)
59
+ # print(f'The next state is: {next_state}')
60
+ # done: Three collisions occurred in the last 10 steps.
61
+ # time == max_env_steps - 1 : No collisions occurred
62
+ if done or time == max_env_steps - 1:
63
+ rewards.append(tot_rewards)
64
+ epsilons.append(DDQN_agent.epsilon)
65
+ print("episode: {}/{}, score: {}, e: {}"
66
+ .format(e, TRAIN_Episodes, tot_rewards, DDQN_agent.epsilon))
67
+ break
68
+ # Applying channel switching cost
69
+ if action != previous_action:
70
+ reward -= csc
71
+ next_state = np.reshape(next_state, [1, s_size])
72
+ tot_rewards += reward
73
+ DDQN_agent.store(state, action, reward, next_state, done) # Resize to store in memory to pass to .predict
74
+ state = next_state
75
+ previous_action = action
76
+
77
+ # Experience Replay
78
+ if len(DDQN_agent.memory) > batch_size:
79
+ DDQN_agent.experience_replay(batch_size)
80
+ # Update the weights after each episode (You can configure this for x steps as well
81
+ DDQN_agent.update_target_from_model()
82
+ # If our current NN passes we are done
83
+ # Early stopping criteria: I am going to use the last 10 runs within 1% of the max
84
+ if len(rewards) > 10 and np.average(rewards[-10:]) >= max_env_steps - 0.10 * max_env_steps:
85
+ # Set the rest of the episodes for testing
86
+ remaining_Episodes = total_episodes - e
87
+ train_end = e
88
+ break
89
+
90
+ # Testing
91
+ print('Training complete. Testing started...')
92
+ # TEST Time
93
+ # In this section we ALWAYS use exploit as we don't train anymore
94
+ total_transmissions = 0
95
+ successful_transmissions = 0
96
+ if remaining_Episodes == 0:
97
+ train_end = TRAIN_Episodes
98
+ TEST_Episodes = 100
99
+ else:
100
+ TEST_Episodes = total_episodes - train_end
101
+ # Testing Loop
102
+ n_channel_switches = 0
103
+ for e_test in range(TEST_Episodes):
104
+ state = env.reset()
105
+ state = np.reshape(state, [1, s_size])
106
+ tot_rewards = 0
107
+ previous_channel = 0
108
+ for t_test in range(max_env_steps):
109
+ action = DDQN_agent.test_action(state)
110
+ next_state, reward, done, _ = env.step(action)
111
+ if done or t_test == max_env_steps - 1:
112
+ rewards.append(tot_rewards)
113
+ epsilons.append(0) # We are doing full exploit
114
+ print("episode: {}/{}, score: {}, e: {}"
115
+ .format(e_test, TEST_Episodes, tot_rewards, 0))
116
+ break
117
+ next_state = np.reshape(next_state, [1, s_size])
118
+ tot_rewards += reward
119
+ if action != previous_channel:
120
+ n_channel_switches += 1
121
+ if reward == 1:
122
+ successful_transmissions += 1
123
+ # DON'T STORE ANYTHING DURING TESTING
124
+ state = next_state
125
+ previous_channel = action
126
+ # done: More than 3 collisions occurred in the last 10 steps.
127
+ # t_test == max_env_steps - 1: No collisions occurred
128
+ total_transmissions += 1
129
+
130
+ # Plotting
131
+ plotName = f'results/{network}/{jammerType}_csc_{csc}.png'
132
+ rolling_average = np.convolve(rewards, np.ones(10) / 10)
133
+ plt.plot(rewards)
134
+ plt.plot(rolling_average, color='black')
135
+ plt.axhline(y=max_env_steps - 0.10 * max_env_steps, color='r', linestyle='-') # Solved Line
136
+ # Scale Epsilon (0.001 - 1.0) to match reward (0 - 200) range
137
+ eps_graph = [200 * x for x in epsilons]
138
+ plt.plot(eps_graph, color='g', linestyle='-')
139
+ # Plot the line where TESTING begins
140
+ plt.axvline(x=train_end, color='y', linestyle='-')
141
+ plt.xlim((0, train_end+TEST_Episodes))
142
+ plt.ylim((0, max_env_steps))
143
+ plt.xlabel('Episodes')
144
+ plt.ylabel('Rewards')
145
+ plt.savefig(plotName, bbox_inches='tight')
146
+ # plt.show()
147
+
148
+ # Save Results
149
+ # Rewards
150
+ fileName = f'results/{network}/rewards_{jammerType}_csc_{csc}.json'
151
+ with open(fileName, 'w') as f:
152
+ json.dump(rewards, f)
153
+ # Normalized throughput
154
+ normalizedThroughput = successful_transmissions / (TEST_Episodes*(max_env_steps-2))
155
+ print(f'The normalized throughput is: {normalizedThroughput}')
156
+ fileName = f'results/{network}/throughput_{jammerType}_csc_{csc}.json'
157
+ with open(fileName, 'w') as f:
158
+ json.dump(normalizedThroughput, f)
159
+ # Channel switching times
160
+ normalized_cst = n_channel_switches / (TEST_Episodes*(max_env_steps-2))
161
+ print(f'The normalized channel switching times is: {normalized_cst}')
162
+ fileName = f'results/{network}/times_{jammerType}_csc_{csc}.json'
163
+ with open(fileName, 'w') as f:
164
+ json.dump(normalized_cst, f)
165
+ # Save the agent as a SavedAgent.
166
+ agentName = f'savedAgents/{network}/DDQNAgent_{jammerType}_csc_{csc}'
167
+ DDQN_agent.save_model(agentName)
trainer.py ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ import os
5
+ from os import mkdir
6
+ import gym
7
+ import tensorflow as tf
8
+ import tf_slim as slim
9
+ import numpy as np
10
+ import matplotlib as mpl
11
+ import matplotlib.pyplot as plt
12
+ import json
13
+ from tensorflow import keras
14
+ from ns3gym import ns3env
15
+ from DDQN_FNN import DoubleDeepQNetwork
16
+
17
+ jammerTypes = ['dynamic_pattern', 'combined', 'sweeping', 'random']
18
+ jammerType = jammerTypes[0]
19
+ network = 'FNN'
20
+ cscs = [0, 0.1, 0.2, 0.3, 0.4] # Channel switching cost
21
+
22
+ for csc in cscs:
23
+ env = gym.make('ns3-v0')
24
+ ob_space = env.observation_space
25
+ ac_space = env.action_space
26
+ print("Observation space: ", ob_space, ob_space.dtype)
27
+ print("Action space: ", ac_space, ac_space.n)
28
+
29
+ s_size = ob_space.shape[0]
30
+ a_size = ac_space.n
31
+ total_episodes = 200
32
+ max_env_steps = 100
33
+ train_end = 0
34
+ TRAIN_Episodes = 100
35
+ remaining_Episodes = 0
36
+ env._max_episode_steps = max_env_steps
37
+
38
+ epsilon = 1.0 # exploration rate
39
+ epsilon_min = 0.01
40
+ epsilon_decay = 0.999
41
+ discount_rate = 0.95
42
+ lr = 0.001
43
+ batch_size = 32
44
+
45
+ DDQN_agent = DoubleDeepQNetwork(s_size, a_size, lr, discount_rate, epsilon, epsilon_min, epsilon_decay)
46
+ rewards = [] # Store rewards for graphing
47
+ epsilons = [] # Store the Explore/Exploit
48
+
49
+ # Training agent
50
+ for e in range(TRAIN_Episodes):
51
+ state = env.reset()
52
+ # print(f"Initial state is: {state}")
53
+ state = np.reshape(state, [1, s_size]) # Resize to store in memory to pass to .predict
54
+ tot_rewards = 0
55
+ previous_action = 0
56
+ for time in range(max_env_steps): # 200 is when you "solve" the game. This can continue forever as far as I know
57
+ action = DDQN_agent.action(state)
58
+ next_state, reward, done, _ = env.step(action)
59
+ # print(f'The next state is: {next_state}')
60
+ # done: Three collisions occurred in the last 10 steps.
61
+ # time == max_env_steps - 1 : No collisions occurred
62
+ if done or time == max_env_steps - 1:
63
+ rewards.append(tot_rewards)
64
+ epsilons.append(DDQN_agent.epsilon)
65
+ print("episode: {}/{}, score: {}, e: {}"
66
+ .format(e, TRAIN_Episodes, tot_rewards, DDQN_agent.epsilon))
67
+ break
68
+ # Applying channel switching cost
69
+ if action != previous_action:
70
+ reward -= csc
71
+ next_state = np.reshape(next_state, [1, s_size])
72
+ tot_rewards += reward
73
+ DDQN_agent.store(state, action, reward, next_state, done) # Resize to store in memory to pass to .predict
74
+ state = next_state
75
+ previous_action = action
76
+
77
+ # Experience Replay
78
+ if len(DDQN_agent.memory) > batch_size:
79
+ DDQN_agent.experience_replay(batch_size)
80
+ # Update the weights after each episode (You can configure this for x steps as well
81
+ DDQN_agent.update_target_from_model()
82
+ # If our current NN passes we are done
83
+ # Early stopping criteria: I am going to use the last 10 runs within 1% of the max
84
+ if len(rewards) > 10 and np.average(rewards[-10:]) >= max_env_steps - 0.10 * max_env_steps:
85
+ # Set the rest of the episodes for testing
86
+ remaining_Episodes = total_episodes - e
87
+ train_end = e
88
+ break
89
+
90
+ # Testing
91
+ print('Training complete. Testing started...')
92
+ # TEST Time
93
+ # In this section we ALWAYS use exploit as we don't train anymore
94
+ total_transmissions = 0
95
+ successful_transmissions = 0
96
+ if remaining_Episodes == 0:
97
+ train_end = TRAIN_Episodes
98
+ TEST_Episodes = 100
99
+ else:
100
+ TEST_Episodes = total_episodes - train_end
101
+ # Testing Loop
102
+ n_channel_switches = 0
103
+ for e_test in range(TEST_Episodes):
104
+ state = env.reset()
105
+ state = np.reshape(state, [1, s_size])
106
+ tot_rewards = 0
107
+ previous_channel = 0
108
+ for t_test in range(max_env_steps):
109
+ action = DDQN_agent.test_action(state)
110
+ next_state, reward, done, _ = env.step(action)
111
+ if done or t_test == max_env_steps - 1:
112
+ rewards.append(tot_rewards)
113
+ epsilons.append(0) # We are doing full exploit
114
+ print("episode: {}/{}, score: {}, e: {}"
115
+ .format(e_test, TEST_Episodes, tot_rewards, 0))
116
+ break
117
+ next_state = np.reshape(next_state, [1, s_size])
118
+ tot_rewards += reward
119
+ if action != previous_channel:
120
+ n_channel_switches += 1
121
+ if reward == 1:
122
+ successful_transmissions += 1
123
+ # DON'T STORE ANYTHING DURING TESTING
124
+ state = next_state
125
+ previous_channel = action
126
+ # done: More than 3 collisions occurred in the last 10 steps.
127
+ # t_test == max_env_steps - 1: No collisions occurred
128
+ total_transmissions += 1
129
+
130
+ # Plotting
131
+ plotName = f'results/{network}/{jammerType}_csc_{csc}.png'
132
+ rolling_average = np.convolve(rewards, np.ones(10) / 10)
133
+ plt.plot(rewards)
134
+ plt.plot(rolling_average, color='black')
135
+ plt.axhline(y=max_env_steps - 0.10 * max_env_steps, color='r', linestyle='-') # Solved Line
136
+ # Scale Epsilon (0.001 - 1.0) to match reward (0 - 200) range
137
+ eps_graph = [200 * x for x in epsilons]
138
+ plt.plot(eps_graph, color='g', linestyle='-')
139
+ # Plot the line where TESTING begins
140
+ plt.axvline(x=train_end, color='y', linestyle='-')
141
+ plt.xlim((0, train_end+TEST_Episodes))
142
+ plt.ylim((0, max_env_steps))
143
+ plt.xlabel('Episodes')
144
+ plt.ylabel('Rewards')
145
+ plt.savefig(plotName, bbox_inches='tight')
146
+ # plt.show()
147
+
148
+ # Save Results
149
+ # Rewards
150
+ fileName = f'results/{network}/rewards_{jammerType}_csc_{csc}.json'
151
+ with open(fileName, 'w') as f:
152
+ json.dump(rewards, f)
153
+ # Normalized throughput
154
+ normalizedThroughput = successful_transmissions / (TEST_Episodes*(max_env_steps-2))
155
+ print(f'The normalized throughput is: {normalizedThroughput}')
156
+ fileName = f'results/{network}/throughput_{jammerType}_csc_{csc}.json'
157
+ with open(fileName, 'w') as f:
158
+ json.dump(normalizedThroughput, f)
159
+ # Channel switching times
160
+ normalized_cst = n_channel_switches / (TEST_Episodes*(max_env_steps-2))
161
+ print(f'The normalized channel switching times is: {normalized_cst}')
162
+ fileName = f'results/{network}/times_{jammerType}_csc_{csc}.json'
163
+ with open(fileName, 'w') as f:
164
+ json.dump(normalized_cst, f)
165
+ # Save the agent as a SavedAgent.
166
+ agentName = f'savedAgents/{network}/DDQNAgent_{jammerType}_csc_{csc}'
167
+ DDQN_agent.save_model(agentName)