Spaces:

asataura
/

jam_shield_LLM_app

Sleeping

App Files Files Community

asataura commited on Aug 14, 2023

Commit

d67dca9

1 Parent(s): 7439a65

Creating trainer and tester modules

Browse files

Files changed (5) hide show

DDQN_FNN.py → DDQN.py +0 -0
antiJamEnv.py +97 -0
antiJamming_v1.py +0 -139
tester.py +167 -0
trainer.py +167 -0

DDQN_FNN.py → DDQN.py RENAMED Viewed

File without changes

antiJamEnv.py ADDED Viewed

	@@ -0,0 +1,97 @@

+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+import gym
+import tensorflow as tf
+import tf_slim as slim
+import numpy as np
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+import json
+from tensorflow import keras
+from ns3gym import ns3env
+import gym
+from gym import spaces
+import numpy as np
+class AntiJamEnv(gym.Env):
+    def __init__(self):
+        super(AntiJamEnv, self).__init__()
+        self.num_channels = 8
+        self.channel_bandwidth = 20  # MHz
+        self.frequency_range = [5180, 5320]  # MHz
+        self.observation_space = spaces.Box(low=-30, high=40, shape=(self.num_channels,), dtype=np.float32)
+        self.action_space = spaces.Discrete(self.num_channels)
+        self.current_channel = np.random.randint(self.num_channels)
+        self.jammer_modes = ['constant', 'random', 'sweeping']
+        self.jammer_mode = np.random.choice(self.jammer_modes)
+        self.jammer_frequency = np.random.uniform(self.frequency_range[0], self.frequency_range[1])
+    def _get_received_power(self, channel_idx):
+        # Simulate received jamming power using normal distribution
+        jammed_power = np.random.normal(loc=30, scale=5)
+        adjacent_power = np.random.normal(loc=13, scale=3)
+        far_away_power = np.random.normal(loc=-7, scale=1)
+        if channel_idx == self.current_channel:
+            return jammed_power
+        elif abs(channel_idx - self.current_channel) == 1:
+            return adjacent_power
+        elif abs(channel_idx - self.current_channel) >= 3:
+            return far_away_power
+        else:
+            return -30  # Unjammed
+    def step(self, action):
+        assert self.action_space.contains(action), "Invalid action"
+        received_power = self._get_received_power(action)
+        if received_power >= 0:
+            reward = 1.0
+        else:
+            reward = 0.0
+        if self.current_channel != action:
+            reward *= 0.9  # Channel switching cost
+        self.current_channel = action
+        if self.jammer_mode == 'random':
+            self.jammer_frequency = np.random.uniform(self.frequency_range[0], self.frequency_range[1])
+        elif self.jammer_mode == 'sweeping':
+            self.jammer_frequency += self.channel_bandwidth
+            if self.jammer_frequency > self.frequency_range[1]:
+                self.jammer_frequency = self.frequency_range[0]
+        self.observation = np.array([self._get_received_power(i) for i in range(self.num_channels)])
+        return self.observation, reward, False, {}
+    def reset(self):
+        self.current_channel = np.random.randint(self.num_channels)
+        self.jammer_mode = np.random.choice(self.jammer_modes)
+        self.jammer_frequency = np.random.uniform(self.frequency_range[0], self.frequency_range[1])
+        self.observation = np.array([self._get_received_power(i) for i in range(self.num_channels)])
+        return self.observation
+    def render(self, mode='human'):
+        pass
+    def close(self):
+        pass
+# Test the environment
+env = AntiJamEnv()
+observation = env.reset()
+for _ in range(10):
+    action = env.action_space.sample()
+    observation, reward, done, _ = env.step(action)
+    print("Action:", action, "Reward:", reward, "Observation:", observation)
+    if done:
+        break

antiJamming_v1.py DELETED Viewed

@@ -1,139 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-import gym
-import tensorflow as tf
-import tf_slim as slim
-import numpy as np
-import matplotlib as mpl
-import matplotlib.pyplot as plt
-import json
-from tensorflow import keras
-from ns3gym import ns3env
-env = gym.make('ns3-v0')
-ob_space = env.observation_space
-ac_space = env.action_space
-print("Observation space: ", ob_space, ob_space.dtype)
-print("Action space: ", ac_space, ac_space.n)
-s_size = ob_space.shape[0]
-a_size = ac_space.n
-jammerType = 'combined'
-model = keras.Sequential()
-model.add(keras.layers.Dense(s_size, input_shape=(s_size,), activation='relu'))
-model.add(keras.layers.Dense(32, activation='relu'))
-model.add(keras.layers.Dense(a_size, activation='softmax'))
-model.compile(optimizer=tf.optimizers.Adam(0.001),
-              loss='categorical_crossentropy',
-              metrics=['accuracy'])
-total_episodes = 1
-max_env_steps = 1
-env._max_episode_steps = max_env_steps
-epsilon = 1.0  # exploration rate
-epsilon_min = 0.01
-epsilon_decay = 0.99
-time_history = []
-rew_history = []
-# Training agent
-for e in range(total_episodes):
-    state = env.reset()
-    state = np.reshape(state, [1, s_size])
-    rewardsum = 0
-    for time in range(max_env_steps):
-        # Choose action
-        if np.random.rand(1) < epsilon:
-            action = np.random.randint(a_size)
-        else:
-            action = np.argmax(model.predict(state)[0])
-        # Step
-        next_state, reward, done, _ = env.step(action)
-        if done or time == max_env_steps - 1:
-            print("episode: {}/{}, time: {}, rew: {}, eps: {:.2}"
-                  .format(e, total_episodes, time, rewardsum, epsilon))
-            break
-        next_state = np.reshape(next_state, [1, s_size])
-        # Train
-        target = reward
-        if not done:
-            target = (reward + 0.95 * np.amax(model.predict(next_state)[0]))
-        target_f = model.predict(state)
-        target_f[0][action] = target
-        model.fit(state, target_f, epochs=1, verbose=0)
-        state = next_state
-        rewardsum += reward
-        if epsilon > epsilon_min: epsilon *= epsilon_decay
-    time_history.append(time)
-    rew_history.append(rewardsum)
-    # Implementing early break
-# Plotting Learning Performance
-print("Plot Learning Performance")
-mpl.rcdefaults()
-mpl.rcParams.update({'font.size': 16})
-fig, ax = plt.subplots(figsize=(10, 4))
-plt.grid(True, linestyle='--')
-plt.title('Learning Performance')
-plt.plot(range(len(time_history)), time_history, label='Steps', marker="^", linestyle=":")  # , color='red')
-plt.plot(range(len(rew_history)), rew_history, label='Reward', marker="", linestyle="-")  # , color='k')
-plt.xlabel('Episode')
-plt.ylabel('Time')
-plt.legend(prop={'size': 12})
-plt.savefig('learning.pdf', bbox_inches='tight')
-plt.show()
-# for n in range(2 ** s_size):
-#    state = [n >> i & 1 for i in range(0, 2)]
-#    state = np.reshape(state, [1, s_size])
-#    print("state " + str(state)
-#        + " -> prediction " + str(model.predict(state)[0])
-#        )
-# Testing agent
-n_runs = 1
-total_trans_pkts = 0
-for run in range(n_runs):
-    state = env.reset()
-    state = np.reshape(state, [1, s_size])
-    total_trans_pkts_per_run = 0
-    for time in range(max_env_steps):
-        # Choose Channel
-        action = np.argmax(model.predict(state)[0])
-        # Step
-        next_state, reward, done, _ = env.step(action)
-        total_trans_pkts_per_run += reward
-        if done or time == max_env_steps - 1:
-            break
-        next_state = np.reshape(next_state, [1, s_size])
-        # Test
-        state = next_state
-    print(f"Run: {run}/{n_runs}, Total transferred packets: {total_trans_pkts_per_run}")
-    total_trans_pkts += total_trans_pkts_per_run
-# print(model.get_config())
-# print(model.to_json())
-# print(model.get_weights())
-# Save Results for this time slots value
-normalizedThroughput = total_trans_pkts / (100 * n_runs)
-print(f'The normalized throughput is: {normalizedThroughput}')
-filename = f'{jammerType}_timeSlots_{max_env_steps}.json'
-with open(filename, 'w') as f:
-    json.dump(normalizedThroughput, f)

tester.py ADDED Viewed

	@@ -0,0 +1,167 @@

+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+import os
+from os import mkdir
+import gym
+import tensorflow as tf
+import tf_slim as slim
+import numpy as np
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+import json
+from tensorflow import keras
+from ns3gym import ns3env
+from DDQN_FNN import DoubleDeepQNetwork
+jammerTypes = ['dynamic_pattern', 'combined', 'sweeping', 'random']
+jammerType = jammerTypes[0]
+network = 'FNN'
+cscs = [0, 0.1, 0.2, 0.3, 0.4]  # Channel switching cost
+for csc in cscs:
+    env = gym.make('ns3-v0')
+    ob_space = env.observation_space
+    ac_space = env.action_space
+    print("Observation space: ", ob_space, ob_space.dtype)
+    print("Action space: ", ac_space, ac_space.n)
+    s_size = ob_space.shape[0]
+    a_size = ac_space.n
+    total_episodes = 200
+    max_env_steps = 100
+    train_end = 0
+    TRAIN_Episodes = 100
+    remaining_Episodes = 0
+    env._max_episode_steps = max_env_steps
+    epsilon = 1.0  # exploration rate
+    epsilon_min = 0.01
+    epsilon_decay = 0.999
+    discount_rate = 0.95
+    lr = 0.001
+    batch_size = 32
+    DDQN_agent = DoubleDeepQNetwork(s_size, a_size, lr, discount_rate, epsilon, epsilon_min, epsilon_decay)
+    rewards = []  # Store rewards for graphing
+    epsilons = []  # Store the Explore/Exploit
+    # Training agent
+    for e in range(TRAIN_Episodes):
+        state = env.reset()
+        # print(f"Initial state is: {state}")
+        state = np.reshape(state, [1, s_size])  # Resize to store in memory to pass to .predict
+        tot_rewards = 0
+        previous_action = 0
+        for time in range(max_env_steps):  # 200 is when you "solve" the game. This can continue forever as far as I know
+            action = DDQN_agent.action(state)
+            next_state, reward, done, _ = env.step(action)
+            # print(f'The next state is: {next_state}')
+            # done: Three collisions occurred in the last 10 steps.
+            # time == max_env_steps - 1 : No collisions occurred
+            if done or time == max_env_steps - 1:
+                rewards.append(tot_rewards)
+                epsilons.append(DDQN_agent.epsilon)
+                print("episode: {}/{}, score: {}, e: {}"
+                      .format(e, TRAIN_Episodes, tot_rewards, DDQN_agent.epsilon))
+                break
+            # Applying channel switching cost
+            if action != previous_action:
+                reward -= csc
+            next_state = np.reshape(next_state, [1, s_size])
+            tot_rewards += reward
+            DDQN_agent.store(state, action, reward, next_state, done)  # Resize to store in memory to pass to .predict
+            state = next_state
+            previous_action = action
+            # Experience Replay
+            if len(DDQN_agent.memory) > batch_size:
+                DDQN_agent.experience_replay(batch_size)
+        # Update the weights after each episode (You can configure this for x steps as well
+        DDQN_agent.update_target_from_model()
+        # If our current NN passes we are done
+        # Early stopping criteria: I am going to use the last 10 runs within 1% of the max
+        if len(rewards) > 10 and np.average(rewards[-10:]) >= max_env_steps - 0.10 * max_env_steps:
+            # Set the rest of the episodes for testing
+            remaining_Episodes = total_episodes - e
+            train_end = e
+            break
+    # Testing
+    print('Training complete. Testing started...')
+    # TEST Time
+    #   In this section we ALWAYS use exploit as we don't train anymore
+    total_transmissions = 0
+    successful_transmissions = 0
+    if remaining_Episodes == 0:
+        train_end = TRAIN_Episodes
+        TEST_Episodes = 100
+    else:
+        TEST_Episodes = total_episodes - train_end
+    # Testing Loop
+    n_channel_switches = 0
+    for e_test in range(TEST_Episodes):
+        state = env.reset()
+        state = np.reshape(state, [1, s_size])
+        tot_rewards = 0
+        previous_channel = 0
+        for t_test in range(max_env_steps):
+            action = DDQN_agent.test_action(state)
+            next_state, reward, done, _ = env.step(action)
+            if done or t_test == max_env_steps - 1:
+                rewards.append(tot_rewards)
+                epsilons.append(0)  # We are doing full exploit
+                print("episode: {}/{}, score: {}, e: {}"
+                      .format(e_test, TEST_Episodes, tot_rewards, 0))
+                break
+            next_state = np.reshape(next_state, [1, s_size])
+            tot_rewards += reward
+            if action != previous_channel:
+                n_channel_switches += 1
+            if reward == 1:
+                successful_transmissions += 1
+            # DON'T STORE ANYTHING DURING TESTING
+            state = next_state
+            previous_channel = action
+            # done: More than 3 collisions occurred in the last 10 steps.
+            # t_test == max_env_steps - 1: No collisions occurred
+            total_transmissions += 1
+    # Plotting
+    plotName = f'results/{network}/{jammerType}_csc_{csc}.png'
+    rolling_average = np.convolve(rewards, np.ones(10) / 10)
+    plt.plot(rewards)
+    plt.plot(rolling_average, color='black')
+    plt.axhline(y=max_env_steps - 0.10 * max_env_steps, color='r', linestyle='-')  # Solved Line
+    # Scale Epsilon (0.001 - 1.0) to match reward (0 - 200) range
+    eps_graph = [200 * x for x in epsilons]
+    plt.plot(eps_graph, color='g', linestyle='-')
+    # Plot the line where TESTING begins
+    plt.axvline(x=train_end, color='y', linestyle='-')
+    plt.xlim((0, train_end+TEST_Episodes))
+    plt.ylim((0, max_env_steps))
+    plt.xlabel('Episodes')
+    plt.ylabel('Rewards')
+    plt.savefig(plotName, bbox_inches='tight')
+    # plt.show()
+    # Save Results
+    # Rewards
+    fileName = f'results/{network}/rewards_{jammerType}_csc_{csc}.json'
+    with open(fileName, 'w') as f:
+        json.dump(rewards, f)
+    # Normalized throughput
+    normalizedThroughput = successful_transmissions / (TEST_Episodes*(max_env_steps-2))
+    print(f'The normalized throughput is: {normalizedThroughput}')
+    fileName = f'results/{network}/throughput_{jammerType}_csc_{csc}.json'
+    with open(fileName, 'w') as f:
+        json.dump(normalizedThroughput, f)
+    # Channel switching times
+    normalized_cst = n_channel_switches / (TEST_Episodes*(max_env_steps-2))
+    print(f'The normalized channel switching times is: {normalized_cst}')
+    fileName = f'results/{network}/times_{jammerType}_csc_{csc}.json'
+    with open(fileName, 'w') as f:
+        json.dump(normalized_cst, f)
+    # Save the agent as a SavedAgent.
+    agentName = f'savedAgents/{network}/DDQNAgent_{jammerType}_csc_{csc}'
+    DDQN_agent.save_model(agentName)

trainer.py ADDED Viewed

	@@ -0,0 +1,167 @@

+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+import os
+from os import mkdir
+import gym
+import tensorflow as tf
+import tf_slim as slim
+import numpy as np
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+import json
+from tensorflow import keras
+from ns3gym import ns3env
+from DDQN_FNN import DoubleDeepQNetwork
+jammerTypes = ['dynamic_pattern', 'combined', 'sweeping', 'random']
+jammerType = jammerTypes[0]
+network = 'FNN'
+cscs = [0, 0.1, 0.2, 0.3, 0.4]  # Channel switching cost
+for csc in cscs:
+    env = gym.make('ns3-v0')
+    ob_space = env.observation_space
+    ac_space = env.action_space
+    print("Observation space: ", ob_space, ob_space.dtype)
+    print("Action space: ", ac_space, ac_space.n)
+    s_size = ob_space.shape[0]
+    a_size = ac_space.n
+    total_episodes = 200
+    max_env_steps = 100
+    train_end = 0
+    TRAIN_Episodes = 100
+    remaining_Episodes = 0
+    env._max_episode_steps = max_env_steps
+    epsilon = 1.0  # exploration rate
+    epsilon_min = 0.01
+    epsilon_decay = 0.999
+    discount_rate = 0.95
+    lr = 0.001
+    batch_size = 32
+    DDQN_agent = DoubleDeepQNetwork(s_size, a_size, lr, discount_rate, epsilon, epsilon_min, epsilon_decay)
+    rewards = []  # Store rewards for graphing
+    epsilons = []  # Store the Explore/Exploit
+    # Training agent
+    for e in range(TRAIN_Episodes):
+        state = env.reset()
+        # print(f"Initial state is: {state}")
+        state = np.reshape(state, [1, s_size])  # Resize to store in memory to pass to .predict
+        tot_rewards = 0
+        previous_action = 0
+        for time in range(max_env_steps):  # 200 is when you "solve" the game. This can continue forever as far as I know
+            action = DDQN_agent.action(state)
+            next_state, reward, done, _ = env.step(action)
+            # print(f'The next state is: {next_state}')
+            # done: Three collisions occurred in the last 10 steps.
+            # time == max_env_steps - 1 : No collisions occurred
+            if done or time == max_env_steps - 1:
+                rewards.append(tot_rewards)
+                epsilons.append(DDQN_agent.epsilon)
+                print("episode: {}/{}, score: {}, e: {}"
+                      .format(e, TRAIN_Episodes, tot_rewards, DDQN_agent.epsilon))
+                break
+            # Applying channel switching cost
+            if action != previous_action:
+                reward -= csc
+            next_state = np.reshape(next_state, [1, s_size])
+            tot_rewards += reward
+            DDQN_agent.store(state, action, reward, next_state, done)  # Resize to store in memory to pass to .predict
+            state = next_state
+            previous_action = action
+            # Experience Replay
+            if len(DDQN_agent.memory) > batch_size:
+                DDQN_agent.experience_replay(batch_size)
+        # Update the weights after each episode (You can configure this for x steps as well
+        DDQN_agent.update_target_from_model()
+        # If our current NN passes we are done
+        # Early stopping criteria: I am going to use the last 10 runs within 1% of the max
+        if len(rewards) > 10 and np.average(rewards[-10:]) >= max_env_steps - 0.10 * max_env_steps:
+            # Set the rest of the episodes for testing
+            remaining_Episodes = total_episodes - e
+            train_end = e
+            break
+    # Testing
+    print('Training complete. Testing started...')
+    # TEST Time
+    #   In this section we ALWAYS use exploit as we don't train anymore
+    total_transmissions = 0
+    successful_transmissions = 0
+    if remaining_Episodes == 0:
+        train_end = TRAIN_Episodes
+        TEST_Episodes = 100
+    else:
+        TEST_Episodes = total_episodes - train_end
+    # Testing Loop
+    n_channel_switches = 0
+    for e_test in range(TEST_Episodes):
+        state = env.reset()
+        state = np.reshape(state, [1, s_size])
+        tot_rewards = 0
+        previous_channel = 0
+        for t_test in range(max_env_steps):
+            action = DDQN_agent.test_action(state)
+            next_state, reward, done, _ = env.step(action)
+            if done or t_test == max_env_steps - 1:
+                rewards.append(tot_rewards)
+                epsilons.append(0)  # We are doing full exploit
+                print("episode: {}/{}, score: {}, e: {}"
+                      .format(e_test, TEST_Episodes, tot_rewards, 0))
+                break
+            next_state = np.reshape(next_state, [1, s_size])
+            tot_rewards += reward
+            if action != previous_channel:
+                n_channel_switches += 1
+            if reward == 1:
+                successful_transmissions += 1
+            # DON'T STORE ANYTHING DURING TESTING
+            state = next_state
+            previous_channel = action
+            # done: More than 3 collisions occurred in the last 10 steps.
+            # t_test == max_env_steps - 1: No collisions occurred
+            total_transmissions += 1
+    # Plotting
+    plotName = f'results/{network}/{jammerType}_csc_{csc}.png'
+    rolling_average = np.convolve(rewards, np.ones(10) / 10)
+    plt.plot(rewards)
+    plt.plot(rolling_average, color='black')
+    plt.axhline(y=max_env_steps - 0.10 * max_env_steps, color='r', linestyle='-')  # Solved Line
+    # Scale Epsilon (0.001 - 1.0) to match reward (0 - 200) range
+    eps_graph = [200 * x for x in epsilons]
+    plt.plot(eps_graph, color='g', linestyle='-')
+    # Plot the line where TESTING begins
+    plt.axvline(x=train_end, color='y', linestyle='-')
+    plt.xlim((0, train_end+TEST_Episodes))
+    plt.ylim((0, max_env_steps))
+    plt.xlabel('Episodes')
+    plt.ylabel('Rewards')
+    plt.savefig(plotName, bbox_inches='tight')
+    # plt.show()
+    # Save Results
+    # Rewards
+    fileName = f'results/{network}/rewards_{jammerType}_csc_{csc}.json'
+    with open(fileName, 'w') as f:
+        json.dump(rewards, f)
+    # Normalized throughput
+    normalizedThroughput = successful_transmissions / (TEST_Episodes*(max_env_steps-2))
+    print(f'The normalized throughput is: {normalizedThroughput}')
+    fileName = f'results/{network}/throughput_{jammerType}_csc_{csc}.json'
+    with open(fileName, 'w') as f:
+        json.dump(normalizedThroughput, f)
+    # Channel switching times
+    normalized_cst = n_channel_switches / (TEST_Episodes*(max_env_steps-2))
+    print(f'The normalized channel switching times is: {normalized_cst}')
+    fileName = f'results/{network}/times_{jammerType}_csc_{csc}.json'
+    with open(fileName, 'w') as f:
+        json.dump(normalized_cst, f)
+    # Save the agent as a SavedAgent.
+    agentName = f'savedAgents/{network}/DDQNAgent_{jammerType}_csc_{csc}'
+    DDQN_agent.save_model(agentName)