~enan/ros-rl

106ef77eda78e37aa6c39e67612741291730e71c — Enan Ajmain 1 year, 9 months ago 570cf55
Non-technical changes
5 files changed, 11 insertions(+), 76 deletions(-)

M src/ddpg.py
M src/envs.py
M src/qlearning.py
M src/td3.py
M src/util.py
M src/ddpg.py => src/ddpg.py +0 -7
@@ 20,13 20,6 @@ from util import interrupt_handler
from util import ReplayBuffer, ActionNormalizer
from util import ActionNoise, NormalActionNoise, OUNoise

import rospy
from cv_bridge import CvBridge, CvBridgeError
from sensor_msgs.msg import Image
from gazebo_msgs.msg import ModelStates
from geometry_msgs.msg import Twist
from std_srvs.srv import Empty

import signal
import random
from typing import List, Tuple

M src/envs.py => src/envs.py +4 -10
@@ 1,11 1,6 @@
import signal
import sys
import time
import gym
import numpy as np
import cv2 as cv



import rospy
from sensor_msgs.msg import Image


@@ 13,8 8,7 @@ from gazebo_msgs.msg import ModelStates
from geometry_msgs.msg import Twist
from std_srvs.srv import Empty


from util import _process_image
from util import process_image

import gym
import numpy as np


@@ 45,8 39,8 @@ class GazeboAutoVehicleEnv(gym.Env):
                                               np.array([1]),
                                               dtype=np.float32)
        else:
            self.action_space = gym.spaces.Box(np.array([-1, 1.0]),
                                               np.array([1, 1.5]),
            self.action_space = gym.spaces.Box(np.array([-1, 0.3]),
                                               np.array([1, 0.7]),
                                               dtype=np.float32)

        self.observation_space = gym.spaces.Box(np.array([-1]),


@@ 66,7 60,7 @@ class GazeboAutoVehicleEnv(gym.Env):
        self.state = None

    def image_callback(self, img):
        self.state = _process_image(img, False)
        self.state = process_image(img, False)
        pass

    def modelstate_callback(self, states):

M src/qlearning.py => src/qlearning.py +2 -11
@@ 2,13 2,10 @@ import gym
import random
import time
import signal
import sys

import numpy as np
import cv2 as cv

import rospy
from cv_bridge import CvBridge, CvBridgeError
from sensor_msgs.msg import Image
from gazebo_msgs.msg import ModelStates
from geometry_msgs.msg import Twist


@@ 17,13 14,7 @@ from std_srvs.srv import Empty
import numpy as np
import matplotlib.pyplot as plt

from util import _process_image, interuppt_handler


def interuppt_handler(signum, frame):
    print("Signal handler!!!")
    sys.exit(-2) #Terminate process here as catching the signal removes the close process behaviour of Ctrl-C

from util import process_image, interuppt_handler


def simulate(env, q_table, alpha, epsilon, epsilon_decay):


@@ 124,7 115,7 @@ class GazeboAutoVehicleEnv():
        # self.unpause = rospy.ServiceProxy(self.GZUNPAUSE_TOPIC, Empty)

    def image_callback(self, img):
        state = _process_image(img, True)
        state = process_image(img, True)
        if state == None:
            self.state = None
            return

M src/td3.py => src/td3.py +1 -43
@@ 8,13 8,6 @@ from util import interrupt_handler
from util import ReplayBuffer, ActionNormalizer
from util import ActionNoise, NormalActionNoise, OUNoise

import rospy
from cv_bridge import CvBridge, CvBridgeError
from sensor_msgs.msg import Image
from gazebo_msgs.msg import ModelStates
from geometry_msgs.msg import Twist
from std_srvs.srv import Empty

import signal
import random
from typing import List, Tuple


@@ 332,7 325,6 @@ class TD3Agent():
        actor_losses: List[float],
        critic_losses: List[float],
    ):

        fig, ax = plt.subplots(3, 1, figsize=(30, 15))
        ax[0].plot(scores)
        ax[0].plot(avgscores)


@@ 348,41 340,6 @@ class TD3Agent():
        print("Loading model . . .")
        self.actor.load_state_dict(torch.load('%s/%s_actor.pth' % (directory, filename)))

"""## Environment
*ActionNormalizer* is an action wrapper class to normalize the action values
ranged in (-1. 1). Thanks to this class, we can make the agent simply select
action values within the zero centered range (-1, 1).
"""

class ActionNormalizer(gym.ActionWrapper):
    """Rescale and relocate the actions."""

    def action(self, action: np.ndarray) -> np.ndarray:
        """Change the range (-1, 1) to (low, high)."""
        low = self.action_space.low
        high = self.action_space.high

        scale_factor = (high - low) / 2
        reloc_factor = high - scale_factor

        action = action * scale_factor + reloc_factor
        action = np.clip(action, low, high)

        return action

    def reverse_action(self, action: np.ndarray) -> np.ndarray:
        """Change the range (low, high) to (-1, 1)."""
        low = self.action_space.low
        high = self.action_space.high

        scale_factor = (high - low) / 2
        reloc_factor = high - scale_factor

        action = (action - reloc_factor) / scale_factor
        action = np.clip(action, -1.0, 1.0)

        return action


signal.signal(signal.SIGINT, interrupt_handler)
env = GazeboAutoVehicleEnv(600, 800, 2, 1)


@@ 411,4 368,5 @@ agent.train(num_frames = 50000)
# model_filename += "_7"
# agent.load(directory="./saves", filename=model_filename)
# while True:
#     env.use_pause = False
#     agent.test()

M src/util.py => src/util.py +4 -5
@@ 1,20 1,19 @@
import signal
import sys
import random
import numpy as np
from typing import Dict
import copy

import gym
import cv2 as cv
from cv_bridge import CvBridge, CvBridgeError
from cv_bridge import CvBridge


def interrupt_handler(signum, frame):
    print("Signal handler!!!")
    print("Signal handler!!!", signum, frame)
    sys.exit(-2)


def _process_image(img, show_image=True):
def process_image(img, show_image=True):
    bridge = CvBridge()
    image = bridge.imgmsg_to_cv2(img, "bgr8")
    gray = bridge.imgmsg_to_cv2(img, "mono8")