~melmon/chizuru-old

b4c47f464ddc1b0dcf30664b2dce3a88a89d61f8 — Melmon 10 months ago 7d63090
YES! YES!!
8 files changed, 54 insertions(+), 98 deletions(-)

M .gitignore
D .vscode/ltex.dictionary.en-GB.txt
D .vscode/ltex.disabledRules.en-GB.txt
D .vscode/ltex.hiddenFalsePositives.en-GB.txt
D .vscode/settings.json
M README.md
M chizuru.py
M writeup/Drescher-DGD-dissertation-2022-23.tex
M .gitignore => .gitignore +2 -7
@@ 160,7 160,6 @@ cython_debug/
#.idea/

.DS_Store
.ipynb_checkpoints
node_modules
/.bazelrc.user
/.tf_configure.bazelrc


@@ 218,10 217,8 @@ docker/docker
.*.swp
a.out
*.orig
.idea
build_src
.flymake*
.DS_Store
docs/_build
docs/_static
docs/_templates


@@ 238,7 235,6 @@ Vagrantfile
## Core latex/pdflatex auxiliary files:
*.aux
*.lof
*.log
*.lot
*.fls
*.out


@@ 507,9 503,6 @@ TSWLatexianTemp*
# Kile
*.backup

# gummi
.*.swp

# KBibTeX
*~[0-9]*



@@ 539,3 532,5 @@ TSWLatexianTemp*
# option is specified. Footnotes are the stored in a file with suffix Notes.bib.
# Uncomment the next line to have this generated file ignored.
#*Notes.bib

training/
\ No newline at end of file

D .vscode/ltex.dictionary.en-GB.txt => .vscode/ltex.dictionary.en-GB.txt +0 -28
@@ 1,28 0,0 @@
Roguelikes
Roguelike
chizuru4rogue
rog-o-matic
rogueinabox
drescher
nethack
angband
Yendor
NetHack
Angband
Drescher
AlphaGo
Cogmind
roguelike
LSTMs
AlphaStar
StarCraft
Mnih
Dota
Jie
roguelikes
dilligent
Cortesi
SkillHack
Str
Chizuru
AAAI

D .vscode/ltex.disabledRules.en-GB.txt => .vscode/ltex.disabledRules.en-GB.txt +0 -1
@@ 1,1 0,0 @@
OXFORD_SPELLING_Z_NOT_S

D .vscode/ltex.hiddenFalsePositives.en-GB.txt => .vscode/ltex.hiddenFalsePositives.en-GB.txt +0 -29
@@ 1,29 0,0 @@
{"rule":"UPPERCASE_SENTENCE_START","sentence":"^\\Qchizuru4rogue: Deep Reinforcement Learning for Dungeon Crawling Project Proposal Dylan G. Drescher November 2022\\E$"}
{"rule":"EN_UNPAIRED_BRACKETS","sentence":"^\\QIf the player never engages in combat with enemies, the player's “experience level\" will never increase, as killing enemies grants “experience points\".\\E$"}
{"rule":"MORFOLOGIK_RULE_EN_GB","sentence":"^\\Q\"AlphaGo\" howpublished=\\E$"}
{"rule":"MORFOLOGIK_RULE_EN_GB","sentence":"^\\Q\"AlphaGo\" howpublished=\"https://www.deepmind.com/research/highlighted-research/alphago\"\\E$"}
{"rule":"UPPERCASE_SENTENCE_START","sentence":"^\\Qchizuru4rogue is a computer program designed to play the beloved video game Rogue.\\E$"}
{"rule":"UPPERCASE_SENTENCE_START","sentence":"^\\Qchizuru4rogue: Deep Reinforcement Learning for Dungeon Crawling Dylan G. Drescher May 2023\\E$"}
{"rule":"MORFOLOGIK_RULE_EN_GB","sentence":"^\\QTo accomplish this project, I will need to contact the University of Bath HPC Support Team to be granted access the University's Balena supercomputing environments.\\E$"}
{"rule":"MORFOLOGIK_RULE_EN_GB","sentence":"^\\QTo accomplish this project, I will need to contact the HPC departmental champion, James Davenport, to be granted access the University's Balena supercomputing environments.\\E$"}
{"rule":"MORFOLOGIK_RULE_EN_GB","sentence":"^\\QRogue is a game that belongs to a genre called “roguelikes\".\\E$"}
{"rule":"MORFOLOGIK_RULE_EN_GB","sentence":"^\\QRogue is a computer game that belongs to a genre called “roguelikes\".\\E$"}
{"rule":"UPPERCASE_SENTENCE_START","sentence":"^\\Qchizuru4rogue is a computer program designed to play the revered video game Rogue.\\E$"}
{"rule":"UPPERCASE_SENTENCE_START","sentence":"^\\Qchizuru4rogue: Deep Reinforcement Learning for Deep Dungeon Crawling Dylan G. Drescher B.Sc. Computer Science - University of Bath May 2023\\E$"}
{"rule":"UPPERCASE_SENTENCE_START","sentence":"^\\Qchizuru4rogue: Deep Reinforcement Learning for Dungeon Crawling Literature, Technology and Data Survey Dylan G. Drescher November 2022\\E$"}
{"rule":"UPPERCASE_SENTENCE_START","sentence":"^\\Qchizuru4rogue: Deep Reinforcement Learning for Dungeon Crawling Literature, Technology and Data Survey Dylan G. Drescher December 2022\\E$"}
{"rule":"EN_COMPOUNDS","sentence":"^\\Q\"Grandmaster level in StarCraft II using multi-agent reinforcement learning\"\\E$"}
{"rule":"UPPERCASE_SENTENCE_START","sentence":"^\\Qchizuru4rogue: Deep Learning for Deep Dungeon Crawling Dylan G. Drescher B.Sc. Computer Science - University of Bath May 2023\\E$"}
{"rule":"UPPERCASE_SENTENCE_START","sentence":"^\\Qchizuru4rogue: Deep Learning for Deep Dungeon Crawling Dylan G. Drescher B.Sc. Computer Science - University of Bath 2022 - 2023\\E$"}
{"rule":"MORFOLOGIK_RULE_EN_GB","sentence":"^\\Qchizuru4rogue: Deep Learning for Deep Dungeoneering Dylan G. Drescher B.Sc. Computer Science - University of Bath 2022 - 2023\\E$"}
{"rule":"MORFOLOGIK_RULE_EN_GB","sentence":"^\\QNetHack is another roguelike that has been explored in SkillHack (\\E(?:Dummy|Ina|Jimmy-)[0-9]+\\Q)\\E$"}
{"rule":"UPPERCASE_SENTENCE_START","sentence":"^\\Qchizuru4rouge utilises\\E$"}
{"rule":"UPPERCASE_SENTENCE_START","sentence":"^\\Qchizuru4rouge utilises a *** to explore levels in Rogue, collect gold and reach the goal.\\E$"}
{"rule":"MORFOLOGIK_RULE_EN_GB","sentence":"^\\Q2021 IEEE Conference on Games (CoG)\\E$"}
{"rule":"DT_PRP","sentence":"^\\QThe player may see what the mapping is by viewing their inventory with the I key.\\E$"}
{"rule":"DT_PRP","sentence":"^\\QThe player may see what the mapping is by viewing their inventory with the I key during the game.\\E$"}
{"rule":"DT_PRP","sentence":"^\\QAdditionally, the player may see what the item to keyboard mapping is by viewing their inventory with the I key during the game.\\E$"}
{"rule":"I_LOWERCASE","sentence":"^\\QAdditionally, the player may see what the item to keyboard mapping is by viewing their inventory with the i key during the game.\\E$"}
{"rule":"PHRASE_REPETITION","sentence":"^\\QWe use as input The player's status - HP, strength, EXP and other attributes The game map, a 21 x 79 array of ASCII characters A 7 x 7 crop of the map centred around the player The player's inventory The items the player is equipped with\\E$"}
{"rule":"PHRASE_REPETITION","sentence":"^\\QWe use the following information to represent game state: The player's status - HP, strength, EXP and other attributes The game map, a 21 x 79 array of ASCII characters A 7 x 7 crop of the map centred around the player The player's inventory The items the player is equipped with\\E$"}
{"rule":"I_LOWERCASE","sentence":"^\\QAdditionally, the player may see what the item to keyboard mapping is by viewing their inventory with the i key at any other point during the game.\\E$"}

D .vscode/settings.json => .vscode/settings.json +0 -3
@@ 1,3 0,0 @@
{
    "editor.wordWrap": "on",
}
\ No newline at end of file

M README.md => README.md +7 -5
@@ 4,7 4,7 @@
Chizuru is an AI that plays the 1980 computer game Rogue.
While this repository contains the code for the AI, it also contains the dissertation released alongside this code in `writeup/`.

You can learn more about Rogue on the [NetHack Wiki page](https://nethackwiki.com/wiki/Rogue_(game)) about it.
You can learn more about Rogue, its history and the way it plays on [this webpage](https://nethackwiki.com/wiki/Rogue_(game)).

## Setup
This thing is designed to run in a Docker container. To do that, run these:


@@ 12,16 12,18 @@ This thing is designed to run in a Docker container. To do that, run these:
docker build -t chizuru .
docker run
```
After that, it should be "smooth" sailing.
After that, it should be smooth sailing.

## Files
Chizuru saves its training checkpoints to `czr-xxxx.ckpt` where `xxxx` is the epoch number.
The model is located in `chizuru.py`. The training file and logic is written in `train.py`, and the code for previewing how the AI plays is located in `preview.py`. Seeing is believing, after all.

Chizuru saves its training checkpoints to `training/czr-xxxx.ckpt` where `xxxx` is the epoch number. It saves a new checkpoint every 5 epochs.

## Bugs
Probably infinite (although countably infinite). However, the distant screams of your PC running this model is *not* a bug. It's a feature.
Probably infinite (although countably infinite). However, the distant screams of your PC and/or GPU cloud running this model is *not* a bug. It's a feature.

## Licence
This program is released under the GNU General Public Licence v3.0.
This program is released under the GNU General Public Licence v3.0, except for the writeup, which is copyrighted.

You should have received a copy of the GNU General Public Licence
along with this program. If not, see <https://www.gnu.org/licenses/>.

M chizuru.py => chizuru.py +43 -24
@@ 1,5 1,5 @@
# This code is governed under the GNU General Public Licence v3.0.

#
#   ██████╗██╗  ██╗██╗███████╗██╗   ██╗██████╗ ██╗   ██╗
#  ██╔════╝██║  ██║██║╚══███╔╝██║   ██║██╔══██╗██║   ██║
#  ██║     ███████║██║  ███╔╝ ██║   ██║██████╔╝██║   ██║


@@ 13,7 13,7 @@
#  ██║  ██║╚██████╔╝╚██████╔╝╚██████╔╝███████╗
#  ╚═╝  ╚═╝ ╚═════╝  ╚═════╝  ╚═════╝ ╚══════╝
#
# An AI that plays Rogue.
# All organic, free-range bits and bytes. Contains no artificial colours or flavourings. May contain bugs.

"""This file contains everything needed to run the Chizuru AI."""



@@ 52,47 52,67 @@ def create_model():
    map_input = tf.keras.Input(shape=(21, 79), dtype=tf.int32)
    crop_input = tf.keras.Input(shape=(9, 9), dtype=tf.int32)

    status_net = tf.keras.layers.Embedding(ASCII_CHARNUM, 64)(status_input)
    status_net = tf.keras.layers.Embedding(ASCII_CHARNUM, 32)(status_input)
    status_net = tf.keras.layers.Dense(32, activation="relu")(status_net)
    status_net = tf.keras.layers.Dense(32, activation="relu")(status_net)
    status_net = tf.keras.layers.Dense(16, activation="relu")(status_net)
    #status_net = tf.keras.layers.Flatten()(status_net)

    inv_net = tf.keras.layers.Embedding(ASCII_CHARNUM, 64)(inv_input)
    inv_net = tf.keras.layers.Embedding(ASCII_CHARNUM, 32)(inv_input)  # replace this with attention layer maybe?
    inv_net = tf.keras.layers.Dense(32, activation="relu")(inv_net)
    inv_net = tf.keras.layers.Dense(16, activation="relu")(inv_net)
    #inv_net = tf.keras.layers.Flatten()(inv_net)

    equip_net = tf.keras.layers.Embedding(ASCII_CHARNUM, 16)(equip_input)
    equip_net = tf.keras.layers.Dense(32, activation="relu")(equip_net)
    equip_net = tf.keras.layers.Dense(16, activation="relu")(equip_net)
    #equip_net = tf.keras.layers.Flatten()(equip_net)

    map_net = tf.keras.layers.Embedding(ASCII_CHARNUM, 64, input_length=21 * 79)(map_input)
    map_net = tf.keras.layers.Conv2D(64, (3, 3), activation="relu", input_shape=(21, 79))(map_net)
    map_net = tf.keras.layers.Embedding(ASCII_CHARNUM, 32, input_length=21 * 79)(map_input)
    map_net = tf.keras.layers.Conv2D(32, (3, 3), activation="relu", input_shape=(21, 79))(map_net)
    map_net = tf.keras.layers.MaxPooling2D((2, 2))(map_net)
    map_net = tf.keras.layers.Conv2D(32, (3, 3), activation="relu")(map_net)
    map_net = tf.keras.layers.MaxPooling2D((2, 2))(map_net)
    map_net = tf.keras.layers.Conv2D(16, (3, 3), activation="relu")(map_net)
    map_net = tf.keras.layers.Conv2D(64, (3, 3), activation="relu")(map_net)
    map_net = tf.keras.layers.Flatten()(map_net)

    crop_net = tf.keras.layers.Embedding(ASCII_CHARNUM, 64, input_length=9 * 9)(crop_input)
    crop_net = tf.keras.layers.Embedding(ASCII_CHARNUM, 32, input_length=9 * 9)(crop_input)
    crop_net = tf.keras.layers.Conv2D(64, (3, 3), activation="relu", input_shape=(9, 9))(crop_net)
    crop_net = tf.keras.layers.MaxPooling2D((2, 2))(crop_net)
    crop_net = tf.keras.layers.Conv2D(16, (3, 3), activation="relu")(crop_net)
    crop_net = tf.keras.layers.Flatten()(crop_net)

    # requires inputs with matching shapes except for the concatenation axis. Received: input_shape=[(None, 16, 16), (None, 64, 16), (None, 16, 16), (None, 1, 16, 64), (None, 1, 1, 32)]
    collected = tf.keras.layers.Concatenate()([status_net, inv_net, equip_net, map_net, crop_net])
    collected = tf.keras.layers.Concatenate()([status_net, inv_net, equip_net])
    print(collected.shape)

    # MLP after concat
    pre_mlp = tf.keras.layers.Dense(64, activation="relu")(collected)
    pre_mlp = tf.keras.layers.Dense(64, activation="relu")(pre_mlp)
    pre_mlp = tf.keras.layers.Dense(48, activation="relu")(pre_mlp)
    pre_mlp = tf.keras.layers.Dense(256, activation="relu")(collected)
    print(pre_mlp.shape)

    # LSTM
    lstm = tf.keras.layers.LSTM(128)(pre_mlp)
    lstm = tf.keras.layers.LSTM(256)(pre_mlp)
    print(lstm.shape)

    # final MLP
    final_mlp = tf.keras.layers.Dense(128)(lstm)
    final_mlp = tf.keras.layers.Dense(64)(final_mlp)

    output = tf.keras.layers.Dense(10)(final_mlp)
    output = tf.keras.layers.Dense(21)(final_mlp)
    print(output.shape)
    # COMMANDS
    # 0  : N MOVE (k)
    # 1  : E MOVE (l)
    # 2  : S MOVE (j)
    # 3  : W MOVE (h)
    # 4  : NE MOVE (u)
    # 5  : SE MOVE (n)
    # 6  : SW MOVE (b)
    # 7  : NW MOVE (y)
    # 8  : SEARCH (s)
    # 9  : WAIT (.)
    # 10 : EAT* (e)
    # 11 : QUAFF* (q)
    # 12 : READ* (r)
    # 13 : WIELD (WEAPON)* (w)
    # 14 : WEAR (ARMOUR)* (W)
    # 15 : TAKE OFF (ARMOUR) (T)
    # 16 : PUT ON (RING)* (p)
    # 17 : REMOVE (RING) (R)
    # 18 : THROW+* (t)
    # 19 : ZAP+* (z)
    # 20 : DROP* (d)

    final_model = tf.keras.Model(
        inputs=[status_input,


@@ 134,5 154,4 @@ if __name__ == "__main__":
    tf.keras.utils.plot_model(model, "stuff.png", show_shapes=True)
    save_checkpoint(model, 0)


# †昇天†

M writeup/Drescher-DGD-dissertation-2022-23.tex => writeup/Drescher-DGD-dissertation-2022-23.tex +2 -1
@@ 199,8 199,9 @@
        \item \textbf{Str} (Strength) represents how strong the player is.
            The number in brackets is the player's maximum strength.
        \item \textbf{Gold} is how many gold coins the player has collected.
            Gold increases the player's final score.
        \item \textbf{Arm} (Armour) is the player's current armour rating.
            Higher is better.
            The higher the rating, the higher chance to avoid attacks.
        \item \textbf{Exp} shows the player's experience level and total experience points.
            When the player earns enough experience points, the player's experience level increases, increasing the player's maximum HP.
    \end{itemize}