From b4c47f464ddc1b0dcf30664b2dce3a88a89d61f8 Mon Sep 17 00:00:00 2001 From: Melmon Date: Tue, 11 Apr 2023 21:51:57 +0100 Subject: [PATCH] YES! YES!! --- .gitignore | 9 +-- .vscode/ltex.dictionary.en-GB.txt | 28 -------- .vscode/ltex.disabledRules.en-GB.txt | 1 - .vscode/ltex.hiddenFalsePositives.en-GB.txt | 29 -------- .vscode/settings.json | 3 - README.md | 12 ++-- chizuru.py | 67 ++++++++++++------- writeup/Drescher-DGD-dissertation-2022-23.tex | 3 +- 8 files changed, 54 insertions(+), 98 deletions(-) delete mode 100644 .vscode/ltex.dictionary.en-GB.txt delete mode 100644 .vscode/ltex.disabledRules.en-GB.txt delete mode 100644 .vscode/ltex.hiddenFalsePositives.en-GB.txt delete mode 100644 .vscode/settings.json diff --git a/.gitignore b/.gitignore index 1d2aa09..13485d3 100644 --- a/.gitignore +++ b/.gitignore @@ -160,7 +160,6 @@ cython_debug/ #.idea/ .DS_Store -.ipynb_checkpoints node_modules /.bazelrc.user /.tf_configure.bazelrc @@ -218,10 +217,8 @@ docker/docker .*.swp a.out *.orig -.idea build_src .flymake* -.DS_Store docs/_build docs/_static docs/_templates @@ -238,7 +235,6 @@ Vagrantfile ## Core latex/pdflatex auxiliary files: *.aux *.lof -*.log *.lot *.fls *.out @@ -507,9 +503,6 @@ TSWLatexianTemp* # Kile *.backup -# gummi -.*.swp - # KBibTeX *~[0-9]* @@ -539,3 +532,5 @@ TSWLatexianTemp* # option is specified. Footnotes are the stored in a file with suffix Notes.bib. # Uncomment the next line to have this generated file ignored. #*Notes.bib + +training/ \ No newline at end of file diff --git a/.vscode/ltex.dictionary.en-GB.txt b/.vscode/ltex.dictionary.en-GB.txt deleted file mode 100644 index 38e5883..0000000 --- a/.vscode/ltex.dictionary.en-GB.txt +++ /dev/null @@ -1,28 +0,0 @@ -Roguelikes -Roguelike -chizuru4rogue -rog-o-matic -rogueinabox -drescher -nethack -angband -Yendor -NetHack -Angband -Drescher -AlphaGo -Cogmind -roguelike -LSTMs -AlphaStar -StarCraft -Mnih -Dota -Jie -roguelikes -dilligent -Cortesi -SkillHack -Str -Chizuru -AAAI diff --git a/.vscode/ltex.disabledRules.en-GB.txt b/.vscode/ltex.disabledRules.en-GB.txt deleted file mode 100644 index 92dba8a..0000000 --- a/.vscode/ltex.disabledRules.en-GB.txt +++ /dev/null @@ -1 +0,0 @@ -OXFORD_SPELLING_Z_NOT_S diff --git a/.vscode/ltex.hiddenFalsePositives.en-GB.txt b/.vscode/ltex.hiddenFalsePositives.en-GB.txt deleted file mode 100644 index be01a20..0000000 --- a/.vscode/ltex.hiddenFalsePositives.en-GB.txt +++ /dev/null @@ -1,29 +0,0 @@ -{"rule":"UPPERCASE_SENTENCE_START","sentence":"^\\Qchizuru4rogue: Deep Reinforcement Learning for Dungeon Crawling Project Proposal Dylan G. Drescher November 2022\\E$"} -{"rule":"EN_UNPAIRED_BRACKETS","sentence":"^\\QIf the player never engages in combat with enemies, the player's “experience level\" will never increase, as killing enemies grants “experience points\".\\E$"} -{"rule":"MORFOLOGIK_RULE_EN_GB","sentence":"^\\Q\"AlphaGo\" howpublished=\\E$"} -{"rule":"MORFOLOGIK_RULE_EN_GB","sentence":"^\\Q\"AlphaGo\" howpublished=\"https://www.deepmind.com/research/highlighted-research/alphago\"\\E$"} -{"rule":"UPPERCASE_SENTENCE_START","sentence":"^\\Qchizuru4rogue is a computer program designed to play the beloved video game Rogue.\\E$"} -{"rule":"UPPERCASE_SENTENCE_START","sentence":"^\\Qchizuru4rogue: Deep Reinforcement Learning for Dungeon Crawling Dylan G. Drescher May 2023\\E$"} -{"rule":"MORFOLOGIK_RULE_EN_GB","sentence":"^\\QTo accomplish this project, I will need to contact the University of Bath HPC Support Team to be granted access the University's Balena supercomputing environments.\\E$"} -{"rule":"MORFOLOGIK_RULE_EN_GB","sentence":"^\\QTo accomplish this project, I will need to contact the HPC departmental champion, James Davenport, to be granted access the University's Balena supercomputing environments.\\E$"} -{"rule":"MORFOLOGIK_RULE_EN_GB","sentence":"^\\QRogue is a game that belongs to a genre called “roguelikes\".\\E$"} -{"rule":"MORFOLOGIK_RULE_EN_GB","sentence":"^\\QRogue is a computer game that belongs to a genre called “roguelikes\".\\E$"} -{"rule":"UPPERCASE_SENTENCE_START","sentence":"^\\Qchizuru4rogue is a computer program designed to play the revered video game Rogue.\\E$"} -{"rule":"UPPERCASE_SENTENCE_START","sentence":"^\\Qchizuru4rogue: Deep Reinforcement Learning for Deep Dungeon Crawling Dylan G. Drescher B.Sc. Computer Science - University of Bath May 2023\\E$"} -{"rule":"UPPERCASE_SENTENCE_START","sentence":"^\\Qchizuru4rogue: Deep Reinforcement Learning for Dungeon Crawling Literature, Technology and Data Survey Dylan G. Drescher November 2022\\E$"} -{"rule":"UPPERCASE_SENTENCE_START","sentence":"^\\Qchizuru4rogue: Deep Reinforcement Learning for Dungeon Crawling Literature, Technology and Data Survey Dylan G. Drescher December 2022\\E$"} -{"rule":"EN_COMPOUNDS","sentence":"^\\Q\"Grandmaster level in StarCraft II using multi-agent reinforcement learning\"\\E$"} -{"rule":"UPPERCASE_SENTENCE_START","sentence":"^\\Qchizuru4rogue: Deep Learning for Deep Dungeon Crawling Dylan G. Drescher B.Sc. Computer Science - University of Bath May 2023\\E$"} -{"rule":"UPPERCASE_SENTENCE_START","sentence":"^\\Qchizuru4rogue: Deep Learning for Deep Dungeon Crawling Dylan G. Drescher B.Sc. Computer Science - University of Bath 2022 - 2023\\E$"} -{"rule":"MORFOLOGIK_RULE_EN_GB","sentence":"^\\Qchizuru4rogue: Deep Learning for Deep Dungeoneering Dylan G. Drescher B.Sc. Computer Science - University of Bath 2022 - 2023\\E$"} -{"rule":"MORFOLOGIK_RULE_EN_GB","sentence":"^\\QNetHack is another roguelike that has been explored in SkillHack (\\E(?:Dummy|Ina|Jimmy-)[0-9]+\\Q)\\E$"} -{"rule":"UPPERCASE_SENTENCE_START","sentence":"^\\Qchizuru4rouge utilises\\E$"} -{"rule":"UPPERCASE_SENTENCE_START","sentence":"^\\Qchizuru4rouge utilises a *** to explore levels in Rogue, collect gold and reach the goal.\\E$"} -{"rule":"MORFOLOGIK_RULE_EN_GB","sentence":"^\\Q2021 IEEE Conference on Games (CoG)\\E$"} -{"rule":"DT_PRP","sentence":"^\\QThe player may see what the mapping is by viewing their inventory with the I key.\\E$"} -{"rule":"DT_PRP","sentence":"^\\QThe player may see what the mapping is by viewing their inventory with the I key during the game.\\E$"} -{"rule":"DT_PRP","sentence":"^\\QAdditionally, the player may see what the item to keyboard mapping is by viewing their inventory with the I key during the game.\\E$"} -{"rule":"I_LOWERCASE","sentence":"^\\QAdditionally, the player may see what the item to keyboard mapping is by viewing their inventory with the i key during the game.\\E$"} -{"rule":"PHRASE_REPETITION","sentence":"^\\QWe use as input The player's status - HP, strength, EXP and other attributes The game map, a 21 x 79 array of ASCII characters A 7 x 7 crop of the map centred around the player The player's inventory The items the player is equipped with\\E$"} -{"rule":"PHRASE_REPETITION","sentence":"^\\QWe use the following information to represent game state: The player's status - HP, strength, EXP and other attributes The game map, a 21 x 79 array of ASCII characters A 7 x 7 crop of the map centred around the player The player's inventory The items the player is equipped with\\E$"} -{"rule":"I_LOWERCASE","sentence":"^\\QAdditionally, the player may see what the item to keyboard mapping is by viewing their inventory with the i key at any other point during the game.\\E$"} diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index 50a5e9d..0000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "editor.wordWrap": "on", -} \ No newline at end of file diff --git a/README.md b/README.md index fe83216..1019427 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ Chizuru is an AI that plays the 1980 computer game Rogue. While this repository contains the code for the AI, it also contains the dissertation released alongside this code in `writeup/`. -You can learn more about Rogue on the [NetHack Wiki page](https://nethackwiki.com/wiki/Rogue_(game)) about it. +You can learn more about Rogue, its history and the way it plays on [this webpage](https://nethackwiki.com/wiki/Rogue_(game)). ## Setup This thing is designed to run in a Docker container. To do that, run these: @@ -12,16 +12,18 @@ This thing is designed to run in a Docker container. To do that, run these: docker build -t chizuru . docker run ``` -After that, it should be "smooth" sailing. +After that, it should be smooth sailing. ## Files -Chizuru saves its training checkpoints to `czr-xxxx.ckpt` where `xxxx` is the epoch number. +The model is located in `chizuru.py`. The training file and logic is written in `train.py`, and the code for previewing how the AI plays is located in `preview.py`. Seeing is believing, after all. + +Chizuru saves its training checkpoints to `training/czr-xxxx.ckpt` where `xxxx` is the epoch number. It saves a new checkpoint every 5 epochs. ## Bugs -Probably infinite (although countably infinite). However, the distant screams of your PC running this model is *not* a bug. It's a feature. +Probably infinite (although countably infinite). However, the distant screams of your PC and/or GPU cloud running this model is *not* a bug. It's a feature. ## Licence -This program is released under the GNU General Public Licence v3.0. +This program is released under the GNU General Public Licence v3.0, except for the writeup, which is copyrighted. You should have received a copy of the GNU General Public Licence along with this program. If not, see . diff --git a/chizuru.py b/chizuru.py index ff9ff99..b811062 100644 --- a/chizuru.py +++ b/chizuru.py @@ -1,5 +1,5 @@ # This code is governed under the GNU General Public Licence v3.0. - +# # ██████╗██╗ ██╗██╗███████╗██╗ ██╗██████╗ ██╗ ██╗ # ██╔════╝██║ ██║██║╚══███╔╝██║ ██║██╔══██╗██║ ██║ # ██║ ███████║██║ ███╔╝ ██║ ██║██████╔╝██║ ██║ @@ -13,7 +13,7 @@ # ██║ ██║╚██████╔╝╚██████╔╝╚██████╔╝███████╗ # ╚═╝ ╚═╝ ╚═════╝ ╚═════╝ ╚═════╝ ╚══════╝ # -# An AI that plays Rogue. +# All organic, free-range bits and bytes. Contains no artificial colours or flavourings. May contain bugs. """This file contains everything needed to run the Chizuru AI.""" @@ -52,47 +52,67 @@ def create_model(): map_input = tf.keras.Input(shape=(21, 79), dtype=tf.int32) crop_input = tf.keras.Input(shape=(9, 9), dtype=tf.int32) - status_net = tf.keras.layers.Embedding(ASCII_CHARNUM, 64)(status_input) + status_net = tf.keras.layers.Embedding(ASCII_CHARNUM, 32)(status_input) status_net = tf.keras.layers.Dense(32, activation="relu")(status_net) status_net = tf.keras.layers.Dense(32, activation="relu")(status_net) - status_net = tf.keras.layers.Dense(16, activation="relu")(status_net) + #status_net = tf.keras.layers.Flatten()(status_net) - inv_net = tf.keras.layers.Embedding(ASCII_CHARNUM, 64)(inv_input) + inv_net = tf.keras.layers.Embedding(ASCII_CHARNUM, 32)(inv_input) # replace this with attention layer maybe? inv_net = tf.keras.layers.Dense(32, activation="relu")(inv_net) - inv_net = tf.keras.layers.Dense(16, activation="relu")(inv_net) + #inv_net = tf.keras.layers.Flatten()(inv_net) equip_net = tf.keras.layers.Embedding(ASCII_CHARNUM, 16)(equip_input) - equip_net = tf.keras.layers.Dense(32, activation="relu")(equip_net) equip_net = tf.keras.layers.Dense(16, activation="relu")(equip_net) + #equip_net = tf.keras.layers.Flatten()(equip_net) - map_net = tf.keras.layers.Embedding(ASCII_CHARNUM, 64, input_length=21 * 79)(map_input) - map_net = tf.keras.layers.Conv2D(64, (3, 3), activation="relu", input_shape=(21, 79))(map_net) + map_net = tf.keras.layers.Embedding(ASCII_CHARNUM, 32, input_length=21 * 79)(map_input) + map_net = tf.keras.layers.Conv2D(32, (3, 3), activation="relu", input_shape=(21, 79))(map_net) map_net = tf.keras.layers.MaxPooling2D((2, 2))(map_net) - map_net = tf.keras.layers.Conv2D(32, (3, 3), activation="relu")(map_net) - map_net = tf.keras.layers.MaxPooling2D((2, 2))(map_net) - map_net = tf.keras.layers.Conv2D(16, (3, 3), activation="relu")(map_net) + map_net = tf.keras.layers.Conv2D(64, (3, 3), activation="relu")(map_net) + map_net = tf.keras.layers.Flatten()(map_net) - crop_net = tf.keras.layers.Embedding(ASCII_CHARNUM, 64, input_length=9 * 9)(crop_input) + crop_net = tf.keras.layers.Embedding(ASCII_CHARNUM, 32, input_length=9 * 9)(crop_input) crop_net = tf.keras.layers.Conv2D(64, (3, 3), activation="relu", input_shape=(9, 9))(crop_net) - crop_net = tf.keras.layers.MaxPooling2D((2, 2))(crop_net) - crop_net = tf.keras.layers.Conv2D(16, (3, 3), activation="relu")(crop_net) + crop_net = tf.keras.layers.Flatten()(crop_net) - # requires inputs with matching shapes except for the concatenation axis. Received: input_shape=[(None, 16, 16), (None, 64, 16), (None, 16, 16), (None, 1, 16, 64), (None, 1, 1, 32)] - collected = tf.keras.layers.Concatenate()([status_net, inv_net, equip_net, map_net, crop_net]) + collected = tf.keras.layers.Concatenate()([status_net, inv_net, equip_net]) + print(collected.shape) # MLP after concat - pre_mlp = tf.keras.layers.Dense(64, activation="relu")(collected) - pre_mlp = tf.keras.layers.Dense(64, activation="relu")(pre_mlp) - pre_mlp = tf.keras.layers.Dense(48, activation="relu")(pre_mlp) + pre_mlp = tf.keras.layers.Dense(256, activation="relu")(collected) + print(pre_mlp.shape) # LSTM - lstm = tf.keras.layers.LSTM(128)(pre_mlp) + lstm = tf.keras.layers.LSTM(256)(pre_mlp) + print(lstm.shape) # final MLP final_mlp = tf.keras.layers.Dense(128)(lstm) - final_mlp = tf.keras.layers.Dense(64)(final_mlp) - output = tf.keras.layers.Dense(10)(final_mlp) + output = tf.keras.layers.Dense(21)(final_mlp) + print(output.shape) + # COMMANDS + # 0 : N MOVE (k) + # 1 : E MOVE (l) + # 2 : S MOVE (j) + # 3 : W MOVE (h) + # 4 : NE MOVE (u) + # 5 : SE MOVE (n) + # 6 : SW MOVE (b) + # 7 : NW MOVE (y) + # 8 : SEARCH (s) + # 9 : WAIT (.) + # 10 : EAT* (e) + # 11 : QUAFF* (q) + # 12 : READ* (r) + # 13 : WIELD (WEAPON)* (w) + # 14 : WEAR (ARMOUR)* (W) + # 15 : TAKE OFF (ARMOUR) (T) + # 16 : PUT ON (RING)* (p) + # 17 : REMOVE (RING) (R) + # 18 : THROW+* (t) + # 19 : ZAP+* (z) + # 20 : DROP* (d) final_model = tf.keras.Model( inputs=[status_input, @@ -134,5 +154,4 @@ if __name__ == "__main__": tf.keras.utils.plot_model(model, "stuff.png", show_shapes=True) save_checkpoint(model, 0) - # †昇天† diff --git a/writeup/Drescher-DGD-dissertation-2022-23.tex b/writeup/Drescher-DGD-dissertation-2022-23.tex index 8b0111b..dee5323 100644 --- a/writeup/Drescher-DGD-dissertation-2022-23.tex +++ b/writeup/Drescher-DGD-dissertation-2022-23.tex @@ -199,8 +199,9 @@ \item \textbf{Str} (Strength) represents how strong the player is. The number in brackets is the player's maximum strength. \item \textbf{Gold} is how many gold coins the player has collected. + Gold increases the player's final score. \item \textbf{Arm} (Armour) is the player's current armour rating. - Higher is better. + The higher the rating, the higher chance to avoid attacks. \item \textbf{Exp} shows the player's experience level and total experience points. When the player earns enough experience points, the player's experience level increases, increasing the player's maximum HP. \end{itemize} -- 2.45.2