~melmon/chizuru-old

7d630901fa10c29cf258f5c6b6dfaedb9b9741b2 — Melmon 10 months ago ca28976
Arrrrgggg
M README.md => README.md +4 -4
@@ 1,8 1,8 @@
![I LOVE ROGUE](LOVEIT.png)
![Anya from SPY x FAMILY pointing at a television screen with Rogue displayed saying 'I like this show.'](LOVEIT.png)

# Chizuru
Chizuru is an AI that plays the 1980 computer game Rogue.
While this repository contains the code for the AI, it also contains the dissertation released alongside this code in `/writeup`.
While this repository contains the code for the AI, it also contains the dissertation released alongside this code in `writeup/`.

You can learn more about Rogue on the [NetHack Wiki page](https://nethackwiki.com/wiki/Rogue_(game)) about it.



@@ 12,10 12,10 @@ This thing is designed to run in a Docker container. To do that, run these:
docker build -t chizuru .
docker run
```
After that, it should be smooth sailing.
After that, it should be "smooth" sailing.

## Files
Chizuru saves its checkpoints to `czr.ckpt` and saves models to `czr.h5`.
Chizuru saves its training checkpoints to `czr-xxxx.ckpt` where `xxxx` is the epoch number.

## Bugs
Probably infinite (although countably infinite). However, the distant screams of your PC running this model is *not* a bug. It's a feature.

M chizuru.py => chizuru.py +50 -23
@@ 1,37 1,56 @@
# This code is governed under the GNU General Public Licence v3.0.

"""This file contains the Chizuru class, a Rogue playing agent."""
import os
#   ██████╗██╗  ██╗██╗███████╗██╗   ██╗██████╗ ██╗   ██╗
#  ██╔════╝██║  ██║██║╚══███╔╝██║   ██║██╔══██╗██║   ██║
#  ██║     ███████║██║  ███╔╝ ██║   ██║██████╔╝██║   ██║
#  ██║     ██╔══██║██║ ███╔╝  ██║   ██║██╔══██╗██║   ██║
#  ╚██████╗██║  ██║██║███████╗╚██████╔╝██║  ██║╚██████╔╝
#   ╚═════╝╚═╝  ╚═╝╚═╝╚══════╝ ╚═════╝ ╚═╝  ╚═╝ ╚═════╝
#  ██████╗  ██████╗  ██████╗ ██╗   ██╗███████╗
#  ██╔══██╗██╔═══██╗██╔════╝ ██║   ██║██╔════╝
#  ██████╔╝██║   ██║██║  ███╗██║   ██║█████╗
#  ██╔══██╗██║   ██║██║   ██║██║   ██║██╔══╝
#  ██║  ██║╚██████╔╝╚██████╔╝╚██████╔╝███████╗
#  ╚═╝  ╚═╝ ╚═════╝  ╚═════╝  ╚═════╝ ╚══════╝
#
# An AI that plays Rogue.

"""This file contains everything needed to run the Chizuru AI."""

import os
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

ASCII_CHARNUM = 128

NUM_ITERATIONS = 20000
BATCH_SIZE = 64
LEARNING_RATE = 1e-3
LOG_INTERVAL = 200

ENVIRONMENT = "rogueinabox"
LOG_INTERVAL = 200

CKPT_PATH = "training/czr.ckpt"
CKPT_PATH = "training/czr-{epoch:04d}.ckpt"
CKPT_DIR = os.path.dirname(CKPT_PATH)

# Hyperparameters
NUM_ITERATIONS = 20000
BATCH_SIZE = 64
ALPHA = 1.0e-3
BETA1 = 0.9
BETA2 = 0.999
EPSILON = 1.0e-8
DECAY = 0.0

CKPT_CALLBACK = tf.keras.callbacks.ModelCheckpoint(
    filepath=CKPT_PATH,
    save_weights_only=True,
    verbose=1
    verbose=1,
    save_freq=5*BATCH_SIZE
)


def create_model():
    status_input = tf.keras.Input(shape=(16,))
    """Instantiates, compiles and returns the Chizuru model."""
    status_input = tf.keras.Input(shape=(64,))
    inv_input = tf.keras.Input(shape=(64,))
    equip_input = tf.keras.Input(shape=(16,))
    map_input = tf.keras.Input(shape=(21, 79))
    crop_input = tf.keras.Input(shape=(9, 9))
    equip_input = tf.keras.Input(shape=(64,))
    map_input = tf.keras.Input(shape=(21, 79), dtype=tf.int32)
    crop_input = tf.keras.Input(shape=(9, 9), dtype=tf.int32)

    status_net = tf.keras.layers.Embedding(ASCII_CHARNUM, 64)(status_input)
    status_net = tf.keras.layers.Dense(32, activation="relu")(status_net)


@@ 47,17 66,18 @@ def create_model():
    equip_net = tf.keras.layers.Dense(16, activation="relu")(equip_net)

    map_net = tf.keras.layers.Embedding(ASCII_CHARNUM, 64, input_length=21 * 79)(map_input)
    map_net = tf.keras.layers.Conv2D(32, (3, 3), activation="relu", input_shape=(21, 79))(map_net)
    map_net = tf.keras.layers.Conv2D(64, (3, 3), activation="relu", input_shape=(21, 79))(map_net)
    map_net = tf.keras.layers.MaxPooling2D((2, 2))(map_net)
    map_net = tf.keras.layers.Conv2D(64, (3, 3), activation="relu")(map_net)
    map_net = tf.keras.layers.Conv2D(32, (3, 3), activation="relu")(map_net)
    map_net = tf.keras.layers.MaxPooling2D((2, 2))(map_net)
    map_net = tf.keras.layers.Conv2D(64, (3, 3), activation="relu")(map_net)
    map_net = tf.keras.layers.Conv2D(16, (3, 3), activation="relu")(map_net)

    crop_net = tf.keras.layers.Embedding(ASCII_CHARNUM, 64, input_length=9 * 9)(crop_input)
    crop_net = tf.keras.layers.Conv2D(64, (3, 3), activation="relu", input_shape=(9, 9))(crop_net)
    crop_net = tf.keras.layers.MaxPooling2D((2, 2))(crop_net)
    crop_net = tf.keras.layers.Conv2D(32, (3, 3), activation="relu")(crop_net)
    crop_net = tf.keras.layers.Conv2D(16, (3, 3), activation="relu")(crop_net)

    # requires inputs with matching shapes except for the concatenation axis. Received: input_shape=[(None, 16, 16), (None, 64, 16), (None, 16, 16), (None, 1, 16, 64), (None, 1, 1, 32)]
    collected = tf.keras.layers.Concatenate()([status_net, inv_net, equip_net, map_net, crop_net])

    # MLP after concat


@@ 84,7 104,7 @@ def create_model():
    )

    final_model.compile(
        optimizer="adam",
        optimizer=tf.keras.optimizers.Adam(),
        loss=tf.keras.losses.MeanSquaredError(),
        metrics=[tf.keras.metrics.SparseCategoricalAccuracy()]
    )


@@ 92,20 112,27 @@ def create_model():
    return final_model


def get_crop(map):
def get_crop(map: list[list[int]]):  # TODO
    """Returns a 9x9 crop of the given Rogue map surrounding the player."""
    pass


def save_checkpoint(model_sv: tf.keras.Model, epoch):
    """Saves the model checkpoint with given epoch."""
    model_sv.save_weights(CKPT_PATH.format(epoch=epoch))
    print("Epoch " + str(epoch) + " saved to " + CKPT_PATH.format(epoch=epoch) + "~")


def load_checkpoint(model_ld: tf.keras.Model):
def load_checkpoint(model_ld: tf.keras.Model, epoch):
    """Loads a model checkpoint at a given epoch."""
    model_ld.load_weights(CKPT_PATH)
    print("File " + CKPT_PATH.format(epoch=epoch) + " loaded to current model~")


if __name__ == "__main__":
    model = create_model()
    tf.keras.utils.plot_model(model, "stuff.png", show_shapes=True)
    save_checkpoint(model, 0)


# †昇天†

M writeup/Drescher-DGD-dissertation-2022-23.tex => writeup/Drescher-DGD-dissertation-2022-23.tex +34 -16
@@ 13,7 13,7 @@

\begin{document}
    \title{chizuru-rogue: Deep Learning for Dungeon Crawling}
    \author{Dylan G. Drescher \\[1ex] B.Sc. Computer Science - University of Bath}
    \author{Dylan G. Drescher\\[1ex]Supervisor: Dr. Jie Zhang\\[1ex]B.Sc. Computer Science - University of Bath}
    \date{2022 - 2023}
    \maketitle



@@ 31,7 31,7 @@

    \newpage

    CHIZURU-ROGUE
    \textbf{CHIZURU-ROGUE}

    submitted by Dylan G. Drescher



@@ 48,9 48,12 @@
    \newpage

    \begin{abstract}
        Video games is one of the most popular problem domains to tackle with reinforcement learning due to the interesting complexity that can arise from simple sets of rules that many games provide.
        By training reinforcement learning models on video games and proving they are effective at solving problems, they can then be repurposed for other problems such as self-driving cars and healthcare.

        In this article we introduce chizuru-rogue, which is a computer program designed to play the video game Rogue, a famous role-playing game that inspired the creation of the ``roguelike'' video game genre.
        Rogue offers a unique problem to solve, requiring a player to solve a partially observable, randomly generated levels.
        chizuru-rouge utilises a customised neural network that involves an LSTM to explore levels in Rogue, collect gold and reach the goal.
        Rogue offers a unique problem to solve, requiring a player to solve partially observable, randomly generated levels.
        chizuru-rouge utilises a customised neural network that involves an LSTM for long-term and short-term memory to explore levels in Rogue, collect gold and reach the goal of collecting the Amulet of Yendor.

        TensorFlow will be used as a framework to implement the reinforcement learning agent.
        TensorFlow is a Python library that provides tools to streamline development of deep learning models.


@@ 84,7 87,7 @@

        % \vspace{5mm}

        % This dissertation made use of Hex, the GPU Cloud in the Department of Computer Science at the University of Bath.
        % This project made use of Hex, the GPU Cloud in the Department of Computer Science at the University of Bath.
    \end{center}

    \newpage


@@ 166,7 169,7 @@
    Roguelike games are mainly characterised by challenging, turn based hack and slash gameplay, procedurally generated levels and permanent character death.

    \subsubsection{Objective}\label{subsubsec:objective}
    In Rogue, your objective is to descend the Dungeon of Doom to slay monsters, collect gold coins, retrieve the Amulet of Yendor and escape the dungeon with it alive.
    In Rogue, your main objective is to get a high score by descending the Dungeon of Doom to slay monsters, collect gold coins, retrieve the Amulet of Yendor and escape the dungeon with it alive.
    The game is turn based, which means the player can spend as long as they want thinking their next move before the game processes the environment.
    Figure~\ref{fig:rogsc} depicts an example screenshot of the game.



@@ 179,8 182,9 @@

    \subsubsection{Environment}\label{subsubsec:environment}

    Every floor of the dungeon is a randomly generated level consisting of several rooms connected with corridors.
    Every floor of the dungeon is a randomly generated maze consisting of several rooms connected with corridors.
    Rooms sometimes generate empty, but they may also generate populated with several items or enemies.
    One of the rooms will contain the stairs that will let the player descend the dungeon, represented with the character \texttt{\%}.
    When the player starts a new run, the player is placed in dungeon level 1 with some food, a mace, basic armour, a bow and arrows.

    Rogue's environment is partially observable.


@@ 272,9 276,15 @@
    \subsection{Policy Optimisation}\label{subsec:policy-optimisation}

    \subsection{Neural Network}\label{subsec:neural-network}
    The neural network processes the inputs in a separate subnetwork, which is then concatenated, fed through an LSTM, and a multilayer perceptron network to produce the final output.
    Figure~\ref{fig:netwk} visually shows the structure of the Chizuru neural network.



    \begin{figure}[t]
        \caption{The structure of the Chizuru neural network *OUTDATED.} % TODO outdated
        \centering
        \includegraphics[scale=0.5]{network_structure}
        \label{fig:netwk}
    \end{figure}

    \section{Implementation}\label{sec:implementation}



@@ 315,12 325,20 @@
    \medskip

    \appendix
    \section{Methods}
    \subsection{Neural Network}
    \subsection{State Representation}
    \subsection{Reward Representation}
    \subsection{Hyperparameters}
    \section{Results}
    \section{Data}
    \section{Methods}\label{sec:methods}

    \subsection{Neural Network}\label{subsec:neural-network2}

    \subsection{State Representation}\label{subsec:state-representation}

    \subsection{Reward Representation}\label{subsec:reward-representation}

    \subsection{Hyperparameters}\label{subsec:hyperparameters}


    \section{Results}\label{sec:results}


    \section{Data}\label{sec:data}

\end{document}

A writeup/img/network_structure.png => writeup/img/network_structure.png +0 -0