~melmon/chizuru-old

37eca43e7e26349297fda1b5ce8f030f40d738e7 — Melmon 9 months ago 8beec5a
Testing in progress ;)
2 files changed, 38 insertions(+), 18 deletions(-)

M chizuru.py
M writeup/Drescher-DGD-dissertation-2022-23.tex
M chizuru.py => chizuru.py +18 -10
@@ 41,7 41,7 @@ HISTORY_LEN = 4
# Hyperparameters
GAMMA = 0.99
NUM_ITERATIONS = 20000
MAX_TURNS_IN_EPISODE = 1250
MAX_TURNS_IN_EPISODE = 1500
BATCH_SIZE = 32
BUFFER_SIZE = 100000
MIN_REPLAY_SIZE = 1500


@@ 267,8 267,7 @@ def create_dueling_dqn(h, w) -> tf.keras.Model:

    final_model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE),
        loss=tf.keras.losses.MeanSquaredError(),
        metrics=[tf.keras.metrics.SparseCategoricalAccuracy()]
        loss=tf.keras.losses.MeanSquaredError()
    )

    return final_model


@@ 298,18 297,24 @@ if __name__ == "__main__":
    writer = tf.summary.create_file_writer(LOG_DIR)

    CONFIG = {
        'width': 79, 'height': 21,
        'width': 79,
        'height': 21,
        'hide_dungeon': True,
        'dungeon': {
            'style': 'rogue',
            'room_num_x': 3, 'room_num_y': 2
            'room_num_x': 3,
            'room_num_y': 2,
        },
        'enemies': []
        'enemies': {
            'enemies': []
        }
    }
    env = RogueEnv(max_steps=MAX_TURNS_IN_EPISODE, stair_reward=100.0, config_dict=CONFIG)
    episode_reward = 0
    intr = 0
    episode = 0
    all_rewards = []
    interval_rewards = []
    all_losses = []
    env.reset()
    new_state, rew, done, _ = env.step('.')


@@ 325,6 330,8 @@ if __name__ == "__main__":
                new_state, rew, done, _ = env.step(ACTIONS[act])
                episode_reward += rew
                all_rewards.append(rew)
                interval_rewards.append(rew)
                all_rewards = all_rewards[-10:]
                current_game_state = np.append(current_game_state[:, :, 1:], new_state.gray_image().reshape(21, 79, 1), axis=2)

                agent.replay_buffer.add_experience(act, new_state.gray_image()[0], rew, done)


@@ 333,14 340,15 @@ if __name__ == "__main__":
                if step % LEARNING_FREQUENCY == 0 and agent.replay_buffer.count > MIN_REPLAY_SIZE:
                    loss, _ = agent.learn(BATCH_SIZE, GAMMA, epsilon, PRIORITY_SCALE)
                    all_losses.append(loss)
                    all_losses = all_losses[-100:]
                    tf.summary.scalar('Loss', loss, step)

                if step % TARGET_UPDATE_FREQUENCY == 0 and step > MIN_REPLAY_SIZE:
                    agent.update_target_network()

                if step % 10 == 0:
                    tf.summary.scalar('Reward', np.mean(all_rewards[-10:]), step)
                    tf.summary.scalar('Losses', np.mean(all_losses[-100:]), step)
                    tf.summary.scalar('Reward', np.mean(all_rewards), step)
                    tf.summary.scalar('Losses', np.mean(all_losses), step)

                if done:
                    dlvl = new_state.dungeon_level


@@ 358,8 366,8 @@ if __name__ == "__main__":
                if step % STEPS_PER_INTERVAL == 0 and step > 0:
                    print('\nInterval', intr)
                    agent.save(intr)
                    tf.summary.scalar('Interval score', np.mean(all_rewards), intr)
                    all_rewards = []
                    tf.summary.scalar('Interval score', np.mean(interval_rewards), intr)
                    interval_rewards = []
                    intr += 1

    except KeyboardInterrupt:

M writeup/Drescher-DGD-dissertation-2022-23.tex => writeup/Drescher-DGD-dissertation-2022-23.tex +20 -8
@@ 352,7 352,7 @@
    \section{Agent Training and Results}\label{sec:agent-training-and-results}  % TODO things here after data collection
    The agent was trained and evaluated on multiple Nvidia GeForce RTX 2080 graphics cards using CUDA.

    Much of our code was inspired by the work of~\citet{sebtheiler}.
    Our training code was adapted from the work of~\citet{sebtheiler}.

    During our training of the agent, we measured the agent's performance with the following criteria after every run:



@@ 411,14 411,9 @@
    \section{Methods}\label{sec:methods}

    \subsection{Neural Network}\label{subsec:neural-network2}
%    \begin{lstlisting}[label={lst:thing}]
%        if __name__ == "__main__":
%            print("Hello, world!")
%    \end{lstlisting}

    \subsection{State Representation}\label{subsec:state-representation}
    The state of the game is represented as a 21x79 grid of ASCII characters as displayed to a human player, an example
    of which is shown in Figure~\ref{fig:rogsc}.
    The state of the game is converted from a 21x79 grid of ASCII characters as displayed to a human player to a 21x79 grid of
    numbers each representing one character using rogue-gym's \texttt{state.gray\_image()} function.

    \subsection{Reward Representation}\label{subsec:reward-representation}
    The reward signals are as follows


@@ 461,6 456,23 @@ LEARNING_FREQUENCY = 75
TARGET_UPDATE_FREQUENCY = 750
    \end{lstlisting}

    \subsubsection{Dueling DQN/Prioritised Experience Replay}
    \begin{lstlisting}[label={lst:ddqnperhyperparameters}]
GAMMA = 0.99
NUM_ITERATIONS = 20000
MAX_TURNS_IN_EPISODE = 1250
BATCH_SIZE = 32
BUFFER_SIZE = 100000
MIN_REPLAY_SIZE = 1500
EPSILON_START = 1.0
EPSILON_END = 0.01
EPSILON_DECAY = 150000
LEARNING_RATE = 0.00001
LEARNING_FREQUENCY = 75
TARGET_UPDATE_FREQUENCY = 750
PRIORITY_SCALE = 0.7
    \end{lstlisting}

    \subsection{Network Architecture}\label{subsec:network-architecture}
    \subsubsection{Dueling DQN}
    \begin{lstlisting}[label={lst:dueling}]