~ntietz/isabella-db

400808c03852d60e28f7499dfbdf1ad15054a96f — Nicole Tietz-Sokolskaya 1 year, 14 days ago f948e3a
Simplify the data structures without degrading performance

Implements: https://todo.sr.ht/~ntietz/isabella-db/15
12 files changed, 73 insertions(+), 229 deletions(-)

M .gitignore
M Cargo.lock
M Makefile
M isabella/Cargo.toml
M isabella/src/bin/idb.rs
R isabella/src/{db/mod.rs => db.rs}
R isabella/src/{game/mod.rs => game.rs}
D isabella/src/game/serde_proxy.rs
R isabella/src/{index/mod.rs => index.rs}
M isabella/src/index/position.rs
R isabella/src/{web/mod.rs => web.rs}
R pgn/src/{load/mod.rs => load.rs}
M .gitignore => .gitignore +4 -0
@@ 4,3 4,7 @@ target/
*.isa
*.isa.*
pgns/

# perf stuff
*.svg
perf.data*

M Cargo.lock => Cargo.lock +1 -0
@@ 1527,6 1527,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fb72c633efbaa2dd666986505016c32c3044395ceaf881518399d2f4127ee29"
dependencies = [
 "autocfg",
 "serde",
 "static_assertions",
 "version_check",
]

M Makefile => Makefile +7 -0
@@ 1,6 1,13 @@

build:
	cargo build --release

check:
	rustup update
	cargo build
	cargo test
	cargo fmt --check
	cargo clippy -- -Dwarnings

docs:
	cargo doc --open

M isabella/Cargo.toml => isabella/Cargo.toml +1 -1
@@ 11,7 11,7 @@ clap = { version = "4.0.18", features = ["derive"] }

bitmap = { path = "../bitmap" }
pgn = { path = "../pgn" }
smartstring = "1.0.1"
smartstring = { version = "1.0.1", features = ["serde"] }

rand = "0.8.5"


M isabella/src/bin/idb.rs => isabella/src/bin/idb.rs +14 -6
@@ 20,7 20,12 @@ struct Args {
#[derive(Subcommand, Debug)]
enum Commands {
    Index,
    Convert { pgn_directory: String },
    Convert {
        pgn_directory: String,

        #[arg(short, long, default_value_t = false)]
        index: bool,
    },
}

fn main() {


@@ 32,9 37,12 @@ fn main() {
    let args = Args::parse();

    match args.command {
        Commands::Convert { pgn_directory } => {
        Commands::Convert {
            pgn_directory,
            index,
        } => {
            // TODO: make directory, clean up if it exists already or warn?
            let output_dir = args.gamedb_filename;
            let output_dir = &args.gamedb_filename;

            let par_args: Vec<(String, String)> = read_dir(pgn_directory)
                .expect("should read directory")


@@ 47,9 55,9 @@ fn main() {
                })
                .collect();

            par_args
                .par_iter()
                .for_each(|(input_path, output_path)| load_from_file(input_path, output_path));
            par_args.par_iter().for_each(|(input_path, output_path)| {
                load_from_file(input_path, output_path, index)
            });
        }
        Commands::Index => {
            index_shards(&args.gamedb_filename);

R isabella/src/db/mod.rs => isabella/src/db.rs +15 -9
@@ 4,9 4,10 @@ use std::io::Write;

use pgn::PgnFile;
use serde::{Deserialize, Serialize};
use shakmaty::Chess;

use crate::game::{Game, GameResult, StartingPosition};
use crate::index::save::save_all;
use crate::index::{GameResultIndex, PositionIndex};
use crate::strings::StringsTable;

const TRACE_CHUNK_SIZE: usize = 500_000;


@@ 55,20 56,18 @@ impl GameDB {
    }

    pub fn sort(&mut self) {
        self.games.sort_by_key(|game| game.sort_key());
        //self.games.sort_by_key(|game| &game.moves);
        //self.games.sort_by_key(|game| game.result);
        self.games.sort();
    }

    pub fn from_pgn(f: PgnFile) -> Self {
        let mut db = GameDB::new();

        let standard_start = Chess::default();

        for record in f {
            let mut game = Game::default();
            if record.starting_position != standard_start {
                game.starting_position = StartingPosition::Custom(record.starting_position);
            if record.starting_position.is_some() {
                game.starting_position = StartingPosition::Custom {
                    fen: record.starting_position.unwrap(),
                };
            }
            game.moves = record.moves;
            for (key, value) in record.tags {


@@ 91,7 90,7 @@ impl GameDB {
    }
}

pub fn load_from_file(pgn_path: &str, output_path: &str) {
pub fn load_from_file(pgn_path: &str, output_path: &str, index: bool) {
    let file = PgnFile::new(pgn_path).expect("should open the file");
    let mut db = GameDB::from_pgn(file);
    db.sort();


@@ 107,4 106,11 @@ pub fn load_from_file(pgn_path: &str, output_path: &str) {
        .expect("should open file to write");
    outfile.write_all(&buf).expect("should write to file");
    tracing::info!("wrote database");

    if index {
        let results_index = GameResultIndex::construct(&db);
        let position_index = PositionIndex::construct(&db);
        save_all(output_path, &position_index, &results_index);
        tracing::info!(path = output_path, "finished saving indexes");
    }
}

R isabella/src/game/mod.rs => isabella/src/game.rs +7 -25
@@ 1,17 1,15 @@
use serde::{Deserialize, Serialize};
use shakmaty::{Chess, Move};
use smartstring::alias::String;

use crate::strings::StringID;

mod serde_proxy;

use serde_proxy::{chess_serde, vec_move_def};

#[derive(Debug, Default, Serialize, Deserialize)]
#[derive(Debug, Default, Serialize, Deserialize, Eq, PartialEq, Ord, PartialOrd)]
pub enum StartingPosition {
    #[default]
    Standard,
    Custom(#[serde(with = "chess_serde")] Chess),
    Custom {
        fen: String,
    },
}

#[derive(


@@ 36,26 34,10 @@ impl From<&str> for GameResult {
    }
}

#[derive(Default, Debug, Serialize, Deserialize)]
#[derive(Default, Debug, Serialize, Deserialize, Eq, PartialEq, Ord, PartialOrd)]
pub struct Game {
    pub starting_position: StartingPosition,
    #[serde(default, with = "vec_move_def")]
    pub moves: Vec<Move>,
    pub moves: Vec<String>,
    pub tags: Vec<(StringID, StringID)>,
    pub result: GameResult,
}

#[derive(Debug, Eq, PartialEq, Ord, PartialOrd)]
pub struct GameSortKey {
    pub result: GameResult,
    pub moves: Vec<String>,
}

impl Game {
    pub fn sort_key(&self) -> GameSortKey {
        let moves = self.moves.iter().take(5).map(|m| m.to_string()).collect();
        let result = self.result;

        GameSortKey { result, moves }
    }
}

D isabella/src/game/serde_proxy.rs => isabella/src/game/serde_proxy.rs +0 -166
@@ 1,166 0,0 @@
use serde::{Deserialize, Serialize};
use shakmaty::{Move, Role, Square};

#[derive(Serialize, Deserialize)]
#[serde(remote = "shakmaty::Role")]
pub enum RoleDef {
    Pawn = 1,
    Knight = 2,
    Bishop = 3,
    Rook = 4,
    Queen = 5,
    King = 6,
}

pub mod opt_role_def {
    use serde::{Deserialize, Deserializer, Serialize, Serializer};

    use super::{Role, RoleDef};

    pub fn serialize<S>(value: &Option<Role>, serializer: S) -> Result<S::Ok, S::Error>
    where
        S: Serializer,
    {
        #[derive(Serialize)]
        struct Helper<'a>(#[serde(with = "RoleDef")] &'a Role);

        value.as_ref().map(Helper).serialize(serializer)
    }

    pub fn deserialize<'de, D>(deserializer: D) -> Result<Option<Role>, D::Error>
    where
        D: Deserializer<'de>,
    {
        #[derive(Deserialize)]
        struct Helper(#[serde(with = "RoleDef")] Role);

        let helper = Option::deserialize(deserializer)?;
        Ok(helper.map(|Helper(external)| external))
    }
}

#[rustfmt::skip]
#[derive(Serialize, Deserialize)]
#[serde(remote = "shakmaty::Square")]
#[repr(u8)]
pub enum SquareDef {
    A1 = 0, B1, C1, D1, E1, F1, G1, H1,
    A2, B2, C2, D2, E2, F2, G2, H2,
    A3, B3, C3, D3, E3, F3, G3, H3,
    A4, B4, C4, D4, E4, F4, G4, H4,
    A5, B5, C5, D5, E5, F5, G5, H5,
    A6, B6, C6, D6, E6, F6, G6, H6,
    A7, B7, C7, D7, E7, F7, G7, H7,
    A8, B8, C8, D8, E8, F8, G8, H8,
}

#[derive(Serialize, Deserialize)]
#[serde(remote = "shakmaty::Move")]
enum MoveDef {
    Normal {
        #[serde(with = "RoleDef")]
        role: Role,
        #[serde(with = "SquareDef")]
        from: Square,
        #[serde(default, with = "opt_role_def")]
        capture: Option<Role>,
        #[serde(with = "SquareDef")]
        to: Square,
        #[serde(default, with = "opt_role_def")]
        promotion: Option<Role>,
    },
    EnPassant {
        #[serde(with = "SquareDef")]
        from: Square,
        #[serde(with = "SquareDef")]
        to: Square,
    },
    Castle {
        #[serde(with = "SquareDef")]
        king: Square,
        #[serde(with = "SquareDef")]
        rook: Square,
    },
    Put {
        #[serde(with = "RoleDef")]
        role: Role,
        #[serde(with = "SquareDef")]
        to: Square,
    },
}

pub mod vec_move_def {
    use serde::{Deserialize, Deserializer, Serialize, Serializer};

    use super::{Move, MoveDef};

    pub fn serialize<S>(value: &[Move], serializer: S) -> Result<S::Ok, S::Error>
    where
        S: Serializer,
    {
        #[derive(Serialize)]
        struct Helper<'a>(#[serde(with = "MoveDef")] &'a Move);

        let helpers: Vec<Helper> = value.iter().map(Helper).collect();
        helpers.serialize(serializer)
    }

    pub fn deserialize<'de, D>(deserializer: D) -> Result<Vec<Move>, D::Error>
    where
        D: Deserializer<'de>,
    {
        #[derive(Deserialize)]
        struct Helper(#[serde(with = "MoveDef")] Move);

        let helpers: Vec<Helper> = Vec::deserialize(deserializer)?;
        let remotes: Vec<Move> = helpers
            .iter()
            .map(|Helper(external)| external.clone())
            .collect();
        Ok(remotes)
    }
}

pub mod chess_serde {
    use serde::{de::Visitor, Deserializer, Serializer};
    use shakmaty::{fen::Fen, CastlingMode, Chess, EnPassantMode};

    pub fn serialize<S>(value: &Chess, serializer: S) -> Result<S::Ok, S::Error>
    where
        S: Serializer,
    {
        let fen = Fen::from_position(value.clone(), EnPassantMode::Always);
        serializer.serialize_str(&fen.to_string())
    }

    struct FenVisitor;
    impl<'de> Visitor<'de> for FenVisitor {
        type Value = Chess;

        fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
            formatter.write_str("a valid FEN string")
        }

        fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
        where
            E: serde::de::Error,
        {
            let fen: Fen = match value.parse() {
                Ok(fen) => fen,
                Err(e) => return Err(E::custom(format!("{e:?}"))),
            };
            let pos: Chess = match fen.into_position(CastlingMode::Standard) {
                Ok(pos) => pos,
                Err(e) => return Err(E::custom(format!("{e:?}"))),
            };
            Ok(pos)
        }
    }

    pub fn deserialize<'de, D>(deserializer: D) -> Result<Chess, D::Error>
    where
        D: Deserializer<'de>,
    {
        deserializer.deserialize_str(FenVisitor)
    }
}

R isabella/src/index/mod.rs => isabella/src/index.rs +0 -0
M isabella/src/index/position.rs => isabella/src/index/position.rs +13 -3
@@ 1,6 1,9 @@
use std::collections::HashMap;

use bitmap::SparseBitmap;
use shakmaty::fen::Fen;
use shakmaty::san::SanPlus;
use shakmaty::CastlingMode;
use shakmaty::{zobrist::ZobristHash, Chess, Position};

use super::unique_fixed::UniqueFixedIndex;


@@ 32,7 35,11 @@ impl PositionIndex {
        for (idx, game) in db.games().iter().enumerate() {
            let mut pos = match &game.starting_position {
                StartingPosition::Standard => Chess::default(),
                StartingPosition::Custom(pos) => pos.clone(),
                StartingPosition::Custom { fen: fen_string } => {
                    let fen: Fen = fen_string.parse().expect("FEN positions should be valid");
                    fen.into_position(CastlingMode::Standard)
                        .expect("FEN position should convert to a position")
                }
            };

            let zhash = pos.zobrist_hash();


@@ 42,8 49,11 @@ impl PositionIndex {
            bmap.set(idx)
                .expect("position {idx} should be able to be set");

            for m in game.moves.iter() {
                pos.play_unchecked(m);
            for san_str in game.moves.iter() {
                let san: SanPlus = san_str.parse().expect("SAN should be valid");
                let m = san.san.to_move(&pos).expect("move should be valid");

                pos.play_unchecked(&m);

                let zhash = pos.zobrist_hash();
                let bmap = positions

R isabella/src/web/mod.rs => isabella/src/web.rs +0 -0
R pgn/src/load/mod.rs => pgn/src/load.rs +11 -19
@@ 1,13 1,13 @@
use std::fmt::Write;
use std::{fs::File, path::Path};

use pgn_reader::{BufferedReader, RawHeader, SanPlus, Skip, Visitor};
use shakmaty::{fen::Fen, CastlingMode, Chess, Move, Position};
use smartstring::alias::String;

#[derive(Debug, Default)]
pub struct PgnRecord {
    pub starting_position: Chess,
    pub ending_position: Chess,
    pub moves: Vec<Move>,
    pub starting_position: Option<String>,
    pub moves: Vec<String>,
    pub tags: Vec<(String, String)>,
}



@@ 21,19 21,13 @@ impl Visitor for Loader {

    fn header(&mut self, key: &[u8], value: RawHeader<'_>) {
        if key == b"FEN" {
            let pos: Option<Chess> = Fen::from_ascii(value.as_bytes())
                .ok()
                .and_then(|f| f.into_position(CastlingMode::Standard).ok());

            if let Some(pos) = pos {
                self.game_record.starting_position = pos.clone();
                self.game_record.ending_position = pos;
            }
            let fen = value.decode_utf8_lossy();
            self.game_record.starting_position = Some(fen.clone().into());
        }

        self.game_record.tags.push((
            String::from_utf8_lossy(key).to_string(),
            String::from_utf8_lossy(value.as_bytes()).to_string(),
            std::string::String::from_utf8_lossy(key).into(),
            std::string::String::from_utf8_lossy(value.as_bytes()).into(),
        ));
    }



@@ 42,11 36,9 @@ impl Visitor for Loader {
    }

    fn san(&mut self, san_plus: SanPlus) {
        if let Ok(m) = san_plus.san.to_move(&self.game_record.ending_position) {
            // the move is already validated by parsing from SAN
            self.game_record.ending_position.play_unchecked(&m);
            self.game_record.moves.push(m);
        }
        let mut m: String = String::new();
        write!(m, "{san_plus}").expect("should format as SAN");
        self.game_record.moves.push(m);
    }

    fn end_game(&mut self) -> Self::Result {