@@ 2,6 2,7 @@
"""This file contains the Chizuru class, a Rogue playing agent."""
import os
+
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
@@ 53,11 54,9 @@ def create_model():
map_net = tf.keras.layers.Conv2D(64, (3, 3), activation="relu")(map_net)
crop_net = tf.keras.layers.Embedding(ASCII_CHARNUM, 64, input_length=9 * 9)(crop_input)
- crop_net = tf.keras.layers.Conv2D(32, (3, 3), activation="relu", input_shape=(9, 9))(crop_net)
- crop_net = tf.keras.layers.MaxPooling2D((2, 2))(crop_net)
- crop_net = tf.keras.layers.Conv2D(64, (3, 3), activation="relu")(crop_net)
+ crop_net = tf.keras.layers.Conv2D(64, (3, 3), activation="relu", input_shape=(9, 9))(crop_net)
crop_net = tf.keras.layers.MaxPooling2D((2, 2))(crop_net)
- crop_net = tf.keras.layers.Conv2D(64, (3, 3), activation="relu")(crop_net)
+ crop_net = tf.keras.layers.Conv2D(32, (3, 3), activation="relu")(crop_net)
collected = tf.keras.layers.Concatenate()([status_net, inv_net, equip_net, map_net, crop_net])
@@ 75,7 74,7 @@ def create_model():
output = tf.keras.layers.Dense(10)(final_mlp)
- final_model = keras.Model(
+ final_model = tf.keras.Model(
inputs=[status_input,
inv_input,
equip_input,
@@ 93,25 92,20 @@ def create_model():
return final_model
-def get_crop():
+def get_crop(map):
pass
-def save_checkpoint(model_sv: tf.keras.Model):
- pass
+def save_checkpoint(model_sv: tf.keras.Model, epoch):
+ model_sv.save_weights(CKPT_PATH.format(epoch=epoch))
def load_checkpoint(model_ld: tf.keras.Model):
model_ld.load_weights(CKPT_PATH)
-def save_agent(model_sv: tf.keras.Model, filename="czr.h5"):
- pass
-
-
-def load_agent(filename="czr.h5"):
- pass
-
-
if __name__ == "__main__":
model = create_model()
+ tf.keras.utils.plot_model(model, "stuff.png", show_shapes=True)
+
+# †昇天†
@@ 1,10 1,15 @@
-\documentclass[11pt,a4paper]{article}
+\documentclass[12pt,a4paper]{article}
\usepackage{natbib}
\usepackage{graphicx}
\usepackage{hyperref}
\usepackage{appendix}
+\usepackage{listings}
+
\graphicspath{ {./img/} }
+\lstset{language=Python}
+\setlength{\parskip}{3pt plus2pt minus2pt}
+\setlength{\parindent}{12px}
\begin{document}
\title{chizuru-rogue: Deep Learning for Dungeon Crawling}
@@ 12,12 17,6 @@
\date{2022 - 2023}
\maketitle
- \begin{abstract}
- In this article we introduce chizuru4rogue, which is a computer program designed to play the video game Rogue, a famous role-playing game that inspired the creation of the ``roguelike'' video game genre. Rogue offers a unique problem to solve, requiring a player to solve a partially observable, randomly generated levels. chizuru-rouge utilises a *** to explore levels in Rogue, collect gold and reach the goal.
-
- TensorFlow will be used as a framework to implement the reinforcement learning agent. TensorFlow is a Python library that provides tools to streamline development of deep learning models.
- \end{abstract}
-
\setcounter{page}{0}
\thispagestyle{empty}
@@ 32,11 31,30 @@
\newpage
+ CHIZURU-ROGUE
+
+ submitted by Dylan G. Drescher
+
\section*{Copyright}
- Attention is drawn to the fact that copyright of this dissertation rests with its author. The Intellectual Property Rights of the products produced as part of the project belong to the author unless otherwise specified below, in accordance with the University of Bath's policy on intellectual property (see \href{https://www.bath.ac.uk/publications/university-ordinances/attachments/Ordinances_1_October_2020.pdf}{here}). This copy of the dissertation has been supplied on condition that anyone who consults it is understood to recognise that its copyright rests with its author and that no quotation from the dissertation and no information derived from it may be published without the prior written consent of the author.
+ Attention is drawn to the fact that copyright of this dissertation rests with its author.
+ The Intellectual Property Rights of the products produced as part of the project belong to the author unless otherwise specified below, in accordance with the University of Bath's policy on intellectual property (see \href{https://www.bath.ac.uk/publications/university-ordinances/attachments/Ordinances_1_October_2020.pdf}{here}).
+ This copy of the dissertation has been supplied on condition that anyone who consults it is understood to recognise that its copyright rests with its author and that no quotation from the dissertation and no information derived from it may be published without the prior written consent of the author.
\section*{Declaration}
- This dissertation is submitted to the University of Bath in accordance with the requirements of the degree of Bachelor of Science in the Department of Computer Science. No portion of the work in this dissertation has been submitted in support of an application for any other degree or qualification of this or any other university or institution of learning. Except where specifically acknowledged, it is the work of the author.
+ This dissertation is submitted to the University of Bath in accordance with the requirements of the degree of Bachelor of Science in the Department of Computer Science.
+ No portion of the work in this dissertation has been submitted in support of an application for any other degree or qualification of this or any other university or institution of learning.
+ Except where specifically acknowledged, it is the work of the author.
+
+ \newpage
+
+ \begin{abstract}
+ In this article we introduce chizuru-rogue, which is a computer program designed to play the video game Rogue, a famous role-playing game that inspired the creation of the ``roguelike'' video game genre.
+ Rogue offers a unique problem to solve, requiring a player to solve a partially observable, randomly generated levels.
+ chizuru-rouge utilises a customised neural network that involves an LSTM to explore levels in Rogue, collect gold and reach the goal.
+
+ TensorFlow will be used as a framework to implement the reinforcement learning agent.
+ TensorFlow is a Python library that provides tools to streamline development of deep learning models.
+ \end{abstract}
\newpage
@@ 73,12 91,14 @@
\pagenumbering{arabic}
- \section{Introduction}
+ \section{Introduction}\label{sec:introduction}
TODO introduction goes here lmao
- \section{Literature Review}
- \subsection{Fundamentals}
- The fundamentals of reinforcement learning and many fundamental algorithms is explained in detail by~\cite{sutton18}.
+ \section{Literature, Technology and Data Review}\label{sec:literature-technology-and-data-review}
+
+ \subsection{Fundamentals}\label{subsec:fundamentals}
+
+ The fundamentals of reinforcement learning and many fundamental algorithms is explained in detail by~\citet{sutton18}.
The core idea behind reinforcement learning algorithms is that an agent performs \emph{actions} on an \emph{environment} by deriving what it should do from its \emph{policy}, which is a mapping from states to actions.
Once the agent performs an action, it receives the new game state as well as a \emph{reward} signal, telling the agent how good its choice was.
@@ 86,72 106,79 @@
This function tells the agent either how profitable being in a certain state and following its policy is, or how profitable taking a certain action then following its policy is.
The theory is that the agent should aim to maximise its reward over the long term.
- One of the most well-known reinforcement learning algorithms is the Q-learning algorithm~\cite[Chapter~6.5]{sutton18}.
+ One of the most well-known reinforcement learning algorithms is the Q-learning algorithm~\citep[chap.~6.5]{sutton18}.
In this algorithm, the agent keeps track of a table, mapping state-action pairs to its value.
When the agent reaches a certain state, it consults its Q-table to determine the most valuable action to take.
- \subsection{Deep Learning}
- While the Q-learning algorithm can solve simple problem domains sufficiently, when it comes to more complex domains that don't have fully observable states such as Atari games, the amount of resources it takes to run the algorithm can become extremely large. The way that \cite{mnih15} chose to solve this is to use a convolutional neural network to approximate the Q-learning action-value functions, through an algorithm the authors call ``Deep Q-network''.
+ \subsection{Deep Learning}\label{subsec:deep-learning}
+ While the Q-learning algorithm can solve simple problem domains sufficiently, when it comes to more complex domains that don't have fully observable states such as Atari games, the amount of resources it takes to run the algorithm can become extremely large.
+ The way that \citet{mnih15} chose to solve this is to use a convolutional neural network to approximate the Q-learning action-value functions, through an algorithm the authors call ``Deep Q-network''.
The Deep Q-network in their writing was shown to play several Atari games to a superhuman level, most notably Video Pinball and Boxing.
- A similar algorithm involving neural networks was employed by~\cite{silver16} in their development of the AlphaGo system, an agent that was found to beat human grandmaster players in the game of Go. The authors used a convolutional neural network alongside ``policy gradient'' reinforcement learning, where
+ A similar algorithm involving neural networks was employed by~\citet{silver16} in their development of the AlphaGo system, an agent that was found to beat human grandmaster players in the game of Go. The authors used a convolutional neural network alongside ``policy gradient'' reinforcement learning, where
While the DQN algorithm by itself is serviceable for simpler problem domains such as Atari, there are better methods to tackle more challenging domains.
- When trying to create an agent that plays the online game Dota 2, \cite{berner19} used a Long Short-term Memory network.
+ When trying to create an agent that plays the online game Dota 2, \citet{berner19} used a Long Short-term Memory network.
- LSTMs were first defined by~\cite{hochreiter97} and improved upon in later works.
+ LSTMs were first defined by~\citet{hochreiter97} and improved upon in later works.
An LSTM is an extension of the ``recurrent'' neural network, where nodes use feedback connections to allow the network to ``remember'' information in the long term.
This solves the problem that traditional neural networks have, where they can't store information that can be useful to them in the long term.
- \subsection{Exploring Rogue}
- The first notable instance of a program being developed to play Rogue was by~\cite{mauldin83}, where they created ``ROG-O-MATIC'', an expert system that plays Rogue.
- An expert system, as stated by~\cite{jackson86} in their book's introduction, ``is a computing system capable of representing and reasoning about some knowledge-rich domain''.
+ \subsection{Exploring Rogue}\label{subsec:exploring-rogue}
+
+ The first notable instance of a program being developed to play Rogue was by~\citet{mauldin83}, where they created ``ROG-O-MATIC'', an expert system that plays Rogue.
+ An expert system, as stated by~\citet{jackson86} in their book's introduction, ``is a computing system capable of representing and reasoning about some knowledge-rich domain''.
Essentially, these systems aim to emulate a human expert in a particular domain and their decision-making.
While expert systems are artificial intelligence, they make no use of machine learning to learn and adapt to situations, they follow what instructions have been programmed within them and are designed to rigidly solve one problem domain.
ROG-O-MATIC provides a good yardstick to measure the performance of the agent we will be creating.
- An interface for machine learning agents to play Rogue has been created, called Rogueinabox (\cite{asperti17}).
+ An interface for machine learning agents to play Rogue has been created, called Rogueinabox~\citep{asperti17}.
Rogueinabox is a framework that allow developers to create agents that interface with the game Rogue.
In the Rogueinabox article, the authors ran a Deep Q-learning agent on the game for testing.
They simplified the problem domain to have the agent only consider exploring the dungeon to find the stairs, without fighting or collecting items.
Their agent performed reasonably well accounting dungeon exploration alone, however, the aim of our agent is to reach the Amulet of Yendor and clear the game, which is difficult if the player does not fight monsters and gets stronger.
- The initial agent proposed in the original Rogueinabox paper was further improved upon (\cite{asperti18}).
+ The initial agent proposed in the original Rogueinabox paper was further improved upon~\citep{asperti18}.
The problem domain was still simplified to only consider getting to the exit stairs alone.
While the previous implementation employed a DQN, the agent in the improvement implemented an A3C algorithm as a base, rather than a DQN. The A3C algorithm in the improvement was partitioned, meaning the sample space is \emph{partitioned} into a set of situations.
This allows the different agents that run simultaneously to learn from different situations to build a common cumulative reward.
- It also involves the work by~\cite{jaderberg16}.
- The A3C algorithm is first defined by~\cite{mnih15}.
+ It also involves the work by~\citet{jaderberg16}.
+ The A3C algorithm is first defined by~\citet{mnih15}.
It is an asynchronous algorithm that aims to optimise a policy and estimate a value function by training multiple actors in parallel.
- \subsection{Exploring Other Roguelikes}
+ \subsection{Exploring Other Roguelikes}\label{subsec:exploring-other-roguelikes}
+
Rogue is not the only roguelike that has been explored with machine learning.
NetHack is one of the most popular games that has been explored with neural networks.
NetHack is a roguelike game created in 1987 and is still being updated to this day, with a small but dedicated player-base.
- SkillHack (\cite{matthews22}).
- NLE (\cite{kuttler20}).
- An article by~\cite{izumiya21} explores how to involve the item inventory in the neural network system of a deep reinforcement learning agent with an attention-based approach.
+ SkillHack~\citep{matthews22}.
+ NLE~\citep{kuttler20}.
+ An article by~\citet{izumiya21} explores how to involve the item inventory in the neural network system of a deep reinforcement learning agent with an attention-based approach.
It is attention based as the system calculates a score for each item in an inventory using an ``attention function''
- \section{Concepts}
- \subsection{Reinforcement Learning Concepts}
+ \section{Concepts}\label{sec:concepts}
+
+ \subsection{Reinforcement Learning Concepts}\label{subsec:reinforcement-learning-concepts}
- \subsection{Rogue Concepts}
+ \subsection{Rogue Concepts}\label{subsec:rogue-concepts}
Rogue is a 1980 role-playing computer game inspired by text-based adventure games and tabletop role-playing games like Dungeons and Dragons that led to the creation of ``roguelikes'' - games that are based off of the core gameplay of Rogue.
Roguelike games are mainly characterised by challenging, turn based hack and slash gameplay, procedurally generated levels and permanent character death.
- \subsubsection{Objective}
+ \subsubsection{Objective}\label{subsubsec:objective}
In Rogue, your objective is to descend the Dungeon of Doom to slay monsters, collect gold coins, retrieve the Amulet of Yendor and escape the dungeon with it alive.
- The game is turn based, which means the player can spend as long as they want thinking their next move before the game processes the environment. Figure \ref{rogsc} depicts an example screenshot of the game.
+ The game is turn based, which means the player can spend as long as they want thinking their next move before the game processes the environment.
+ Figure~\ref{fig:rogsc} depicts an example screenshot of the game.
\begin{figure}[t]
\caption{A screenshot of an example Rogue game.}
\centering
\includegraphics[scale=0.5]{rogue_screenshot}
- \label{rogsc}
+ \label{fig:rogsc}
\end{figure}
- \subsubsection{Environment}
+
+ \subsubsection{Environment}\label{subsubsec:environment}
+
Every floor of the dungeon is a randomly generated level consisting of several rooms connected with corridors.
Rooms sometimes generate empty, but they may also generate populated with several items or enemies.
When the player starts a new run, the player is placed in dungeon level 1 with some food, a mace, basic armour, a bow and arrows.
@@ 160,17 187,22 @@
The dungeon configuration is initially obscured to the player, revealing itself as the player moves around.
In addition, enemies on the map will only be shown to the player if the enemy is within the player character's line of sight.
- The game tracks several things that are always shown to the player:
+ The game tracks several stats that are always shown to the player:
\begin{itemize}
\item \textbf{Level} denotes the current dungeon level.
- \item \textbf{HP} (Hit Points) represents how much damage the player can take before death. The number in brackets is the player's maximum HP.
- \item \textbf{Str} (Strength) represents how strong the player is. The number in brackets is the player's maximum strength.
+ \item \textbf{HP} (Hit Points) represents how much damage the player can take before death.
+ The number in brackets is the player's maximum HP.
+ \item \textbf{Str} (Strength) represents how strong the player is.
+ The number in brackets is the player's maximum strength.
\item \textbf{Gold} is how many gold coins the player has collected.
- \item \textbf{Arm} (Armour) is the player's current armour rating. Higher is better.
- \item \textbf{Exp} shows the player's experience level and total experience points. When the player earns enough experience points, the player's experience level increases, increasing the player's maximum HP.
+ \item \textbf{Arm} (Armour) is the player's current armour rating.
+ Higher is better.
+ \item \textbf{Exp} shows the player's experience level and total experience points.
+ When the player earns enough experience points, the player's experience level increases, increasing the player's maximum HP.
\end{itemize}
- \subsubsection{Items}
+ \subsubsection{Items}\label{subsubsec:items}
+
There are a wide variety of items the player can use, such as potions, scrolls, weapons and armour.
Some items need to be identified before the player knows what it will do.
This can either be done by using a scroll of identify, or by blindly using or wearing the item, which may be risky.
@@ 178,26 210,30 @@
Cursed rings may not be removed once equipped, and they reduce the player's stats.
Curses can be removed with the scroll of remove curse.
- \subsubsection{Combat}
+ \subsubsection{Combat}\label{subsubsec:combat}
As the player navigates around the dungeon, they will encounter enemies of increasing difficulty.
Enemies in the game will try to harm the player by attacking and reducing the player's HP\@.
- If the player's HP reaches 0, the player loses the game.
The player can attack enemies by moving into them.
- This will make the player hit the enemy with their equipped weapon.
- Each weapon in Rogue deals a different amount of damage, so it is important to find and equip stronger weapons.
+ This will make the player automatically hit the enemy with their equipped weapon.
+ Each weapon in Rogue deals a different amount of damage, so it is important to find stronger weapons.
If the player defeats an enemy, they are granted ``experience points''.
- When the player earns enough experience points to increase their player level, their HP and Strength increases, making them stronger.
-
+ When the player earns enough experience points to increase their player level, their HP increases, making them tougher.
- % this can be improved Character permanent death provides a very interesting situation in the game that isn't present in many other games where you can save your progress and load previous save games. \emph{Michael Toy}, Rogue's co-creator, touched on the topic of permadeath in Roguelike Celebration 2016 in \cite{gamasutra16} by saying `We were trying to make it more immersive by making things matter ... ``this thing matters, so I'm going to think about this.'''. Every decision you make has weight to it, as the player is unable to undo any mistakes they make or adverse situations they end up in, so the player is inclined to think their actions through, providing a sense of tension in the game that would otherwise be absent.
+ If the player's HP reaches 0, the player dies.
+ The game will then provide the player with a scoreboard of the top 10 plays, each entry containing the name of the player, their score and their fate.
+ The player will then have to start the game from the beginning, generating a new dungeon to explore.
+ Unlike many other role-playing games of the time, Rogue uses character permanent death as a mechanic, providing the player with
+ the unique challenge of surviving till the end, as the player could not load a previous save if they are defeated.
+ This makes the player think through their future moves much more rigorously as the player's decisions have much more weight to them.
+ \emph{Michael Toy}, Rogue's co-creator, touched on the topic of permanent death in Roguelike Celebration 2016 in \citet{gamasutra16} by saying `We were trying to make it more immersive by making things matter \ldots ``this thing matters, so I'm going to think about this.'' '.
- % Rogue is a partially observable Markov Decision Process. To deal with this, we use a Long Short-term Memory system, an extension of a feedforward neural network, to process the sequence of observations. This is because LSTMs are capable of ``remembering'' information for longer periods of time. The LSTM algorithm was first defined by \cite{hochreiter97} and popularised much later, one example of an agent implementing a LSTM including AlphaStar by \cite{vinyals19}.
+ % Rogue is a partially observable Markov Decision Process. To deal with this, we use a Long Short-term Memory system, an extension of a feedforward neural network, to process the sequence of observations. This is because LSTMs are capable of ``remembering'' information for longer periods of time. The LSTM algorithm was first defined by \citet{hochreiter97} and popularised much later, one example of an agent implementing a LSTM including AlphaStar by \citet{vinyals19}.
- % The goal of chizuru4rogue is to improve upon the work of \cite{asperti18} by introducing enemies and items into the Rogue world for the agent to deal with. The agent will endeavour to maximise the final score that it gets within one run. A run's final score is used as the reward for the reinforcement learning methods within the agent. A run's final score is determined by how much gold a player collects. The deeper a player ventures in the dungeon, the more gold they can collect. Additionally, the player gains a large score bonus if the game ends while the player possesses the Amulet of Yendor, an item found in dungeon level 26.
+ % The goal of chizuru4rogue is to improve upon the work of \citet{asperti18} by introducing enemies and items into the Rogue world for the agent to deal with. The agent will endeavour to maximise the final score that it gets within one run. A run's final score is used as the reward for the reinforcement learning methods within the agent. A run's final score is determined by how much gold a player collects. The deeper a player ventures in the dungeon, the more gold they can collect. Additionally, the player gains a large score bonus if the game ends while the player possesses the Amulet of Yendor, an item found in dungeon level 26.
% We use a combination of supervised learning and self-play. During the supervised learning portion of the learning process, we provide replays of Rog-o-Matic completing the game. During the self-play portion of the learning process, chizuru4rogue will play thousands of runs interfacing with Rogueinabox to receive game state and send actions.
@@ 205,10 241,12 @@
- \section{Network Architecture}
+ \section{Network Architecture}\label{sec:network-architecture}
+
The objective of the neural network for Chizuru is to take in the observed dungeon map, player status, recent message and inventory as inputs and return an action that will maximise the expected reward as output as if it were maximising an action-value function.
- \subsection{State}
+ \subsection{State}\label{subsec:state}
+
We use the following information to represent game state:
\begin{itemize}
\item The player's status - HP, strength, EXP and other attributes.
@@ 218,7 256,8 @@
\item The items the player is equipped with
\end{itemize}
- \subsection{Action}
+ \subsection{Action}\label{subsec:action}
+
Every action in Rogue is available to the player from the start of the game.
Actions can be divided into basic actions and actions that utilise an inventory item, depending on the action and the item type.
For example,
@@ 230,20 269,17 @@
The player may input \texttt{*} in order to see what legal items they may choose and their corresponding key.
Additionally, the player may see what the item to keyboard mapping is by viewing their inventory with the \texttt{i} key at any other point during the game.
- \subsection{Policy Optimisation}
- Our goal was to find an optimal policy that maximises the chance that the agent can successfully reach the 26th dungeon level and get the Amulet of Yendor.
- \subsection{State Representation}
- The agent will use Rogueinabox to interface with the game. Rogueinabox is a program
- \subsection{Reward Representation}
- \subsection{Neural Network}
+ \subsection{Policy Optimisation}\label{subsec:policy-optimisation}
+
+ \subsection{Neural Network}\label{subsec:neural-network}
- \subsection{Hyperparameters}
+ \section{Implementation}\label{sec:implementation}
+
+ \subsection{Language}\label{subsec:language}
- \section{Implementation}
- \subsection{Language}
The agent will be implemented in Python.
Python is one of the most popular languages used to model neural networks due to the large amount of artificial intelligence related libraries that are available for the language.
The main library we will be using is TensorFlow with Keras.
@@ 251,20 287,24 @@
Keras is a wrapper for TensorFlow that streamlines the creation of machine learning models in Python by providing the programmer with tools to construct machine learning models with ease.
- \section{Agent Training and Investigation}
- \subsection{Evaluation}
+ \section{Agent Training and Investigation}\label{sec:agent-training-and-investigation}
+
+ \subsection{Evaluation}\label{subsec:evaluation}
+
During our training of the agent, we measured the agent's performance with the following criteria after every run:
\begin{itemize}
\item The final score the agent achieved
\item The deepest dungeon level the agent entered
\end{itemize}
- \section{Conclusion and Future work}
- chizuru-rogue achieves its goal on being an improvement to~\cite{asperti18}'s simple navigation by being able to use items in game and fight monsters.
+ \section{Conclusion and Future work}\label{sec:conclusion-and-future-work}
+
+ chizuru-rogue achieves its goal on being an improvement to~\citet{asperti18}`s simple navigation by being able to use items in game and fight monsters.
For future developments of the model, we plan to use *** to *** because ***
- \section{Reflection}
+ \section{Reflection}\label{sec:reflection}
+
% Write some bollocks on how RL works well on video games and how this can lead to real-world developments with this technology.
\medskip
@@ 276,6 316,10 @@
\appendix
\section{Methods}
+ \subsection{Neural Network}
+ \subsection{State Representation}
+ \subsection{Reward Representation}
+ \subsection{Hyperparameters}
\section{Results}
\section{Data}