M src/ddpg.py => src/ddpg.py +0 -7
@@ 20,13 20,6 @@ from util import interrupt_handler
from util import ReplayBuffer, ActionNormalizer
from util import ActionNoise, NormalActionNoise, OUNoise
-import rospy
-from cv_bridge import CvBridge, CvBridgeError
-from sensor_msgs.msg import Image
-from gazebo_msgs.msg import ModelStates
-from geometry_msgs.msg import Twist
-from std_srvs.srv import Empty
-
import signal
import random
from typing import List, Tuple
M src/envs.py => src/envs.py +4 -10
@@ 1,11 1,6 @@
-import signal
-import sys
import time
import gym
import numpy as np
-import cv2 as cv
-
-
import rospy
from sensor_msgs.msg import Image
@@ 13,8 8,7 @@ from gazebo_msgs.msg import ModelStates
from geometry_msgs.msg import Twist
from std_srvs.srv import Empty
-
-from util import _process_image
+from util import process_image
import gym
import numpy as np
@@ 45,8 39,8 @@ class GazeboAutoVehicleEnv(gym.Env):
np.array([1]),
dtype=np.float32)
else:
- self.action_space = gym.spaces.Box(np.array([-1, 1.0]),
- np.array([1, 1.5]),
+ self.action_space = gym.spaces.Box(np.array([-1, 0.3]),
+ np.array([1, 0.7]),
dtype=np.float32)
self.observation_space = gym.spaces.Box(np.array([-1]),
@@ 66,7 60,7 @@ class GazeboAutoVehicleEnv(gym.Env):
self.state = None
def image_callback(self, img):
- self.state = _process_image(img, False)
+ self.state = process_image(img, False)
pass
def modelstate_callback(self, states):
M src/qlearning.py => src/qlearning.py +2 -11
@@ 2,13 2,10 @@ import gym
import random
import time
import signal
-import sys
import numpy as np
-import cv2 as cv
import rospy
-from cv_bridge import CvBridge, CvBridgeError
from sensor_msgs.msg import Image
from gazebo_msgs.msg import ModelStates
from geometry_msgs.msg import Twist
@@ 17,13 14,7 @@ from std_srvs.srv import Empty
import numpy as np
import matplotlib.pyplot as plt
-from util import _process_image, interuppt_handler
-
-
-def interuppt_handler(signum, frame):
- print("Signal handler!!!")
- sys.exit(-2) #Terminate process here as catching the signal removes the close process behaviour of Ctrl-C
-
+from util import process_image, interuppt_handler
def simulate(env, q_table, alpha, epsilon, epsilon_decay):
@@ 124,7 115,7 @@ class GazeboAutoVehicleEnv():
# self.unpause = rospy.ServiceProxy(self.GZUNPAUSE_TOPIC, Empty)
def image_callback(self, img):
- state = _process_image(img, True)
+ state = process_image(img, True)
if state == None:
self.state = None
return
M src/td3.py => src/td3.py +1 -43
@@ 8,13 8,6 @@ from util import interrupt_handler
from util import ReplayBuffer, ActionNormalizer
from util import ActionNoise, NormalActionNoise, OUNoise
-import rospy
-from cv_bridge import CvBridge, CvBridgeError
-from sensor_msgs.msg import Image
-from gazebo_msgs.msg import ModelStates
-from geometry_msgs.msg import Twist
-from std_srvs.srv import Empty
-
import signal
import random
from typing import List, Tuple
@@ 332,7 325,6 @@ class TD3Agent():
actor_losses: List[float],
critic_losses: List[float],
):
-
fig, ax = plt.subplots(3, 1, figsize=(30, 15))
ax[0].plot(scores)
ax[0].plot(avgscores)
@@ 348,41 340,6 @@ class TD3Agent():
print("Loading model . . .")
self.actor.load_state_dict(torch.load('%s/%s_actor.pth' % (directory, filename)))
-"""## Environment
-*ActionNormalizer* is an action wrapper class to normalize the action values
-ranged in (-1. 1). Thanks to this class, we can make the agent simply select
-action values within the zero centered range (-1, 1).
-"""
-
-class ActionNormalizer(gym.ActionWrapper):
- """Rescale and relocate the actions."""
-
- def action(self, action: np.ndarray) -> np.ndarray:
- """Change the range (-1, 1) to (low, high)."""
- low = self.action_space.low
- high = self.action_space.high
-
- scale_factor = (high - low) / 2
- reloc_factor = high - scale_factor
-
- action = action * scale_factor + reloc_factor
- action = np.clip(action, low, high)
-
- return action
-
- def reverse_action(self, action: np.ndarray) -> np.ndarray:
- """Change the range (low, high) to (-1, 1)."""
- low = self.action_space.low
- high = self.action_space.high
-
- scale_factor = (high - low) / 2
- reloc_factor = high - scale_factor
-
- action = (action - reloc_factor) / scale_factor
- action = np.clip(action, -1.0, 1.0)
-
- return action
-
signal.signal(signal.SIGINT, interrupt_handler)
env = GazeboAutoVehicleEnv(600, 800, 2, 1)
@@ 411,4 368,5 @@ agent.train(num_frames = 50000)
# model_filename += "_7"
# agent.load(directory="./saves", filename=model_filename)
# while True:
+# env.use_pause = False
# agent.test()
M src/util.py => src/util.py +4 -5
@@ 1,20 1,19 @@
-import signal
import sys
+import random
import numpy as np
from typing import Dict
import copy
import gym
import cv2 as cv
-from cv_bridge import CvBridge, CvBridgeError
+from cv_bridge import CvBridge
def interrupt_handler(signum, frame):
- print("Signal handler!!!")
+ print("Signal handler!!!", signum, frame)
sys.exit(-2)
-
-def _process_image(img, show_image=True):
+def process_image(img, show_image=True):
bridge = CvBridge()
image = bridge.imgmsg_to_cv2(img, "bgr8")
gray = bridge.imgmsg_to_cv2(img, "mono8")