~ntietz/isabella-db

56cbc75500da327fe18ea70669317ca690f316e0 — Nicole Tietz-Sokolskaya 2 years ago b455f52
Implement GameResultIndex to allow finding games which have a particular result
M Cargo.toml => Cargo.toml +6 -1
@@ 10,7 10,12 @@ thiserror = "1.0.37"
tracing = "0.1"
tracing-subscriber = { version = "0.3", features = ["env-filter", "fmt", "std"] }

[profile.lto-release]
inherits = "release"
lto = "fat"
opt-level = 3

[profile.release]
debug = true
lto = "fat"
lto = "thin"
opt-level = 3

M isabella/src/bin/idb.rs => isabella/src/bin/idb.rs +14 -14
@@ 3,7 3,7 @@ use std::io::{Read, Write};

use clap::{Parser, Subcommand};
use isabella_db::db::GameDB;
use isabella_db::index::PositionIndex;
use isabella_db::index::{save::save_all, GameResultIndex, PositionIndex};
use pgn::load::PgnFile;
use tracing_subscriber::fmt::format::FmtSpan;
use tracing_subscriber::EnvFilter;


@@ 20,7 20,7 @@ struct Args {
#[derive(Subcommand, Debug)]
enum Commands {
    Convert { pgn_filename: String },
    Index { index_filename: String },
    Index,
}

fn main() {


@@ 45,27 45,27 @@ fn main() {
                .expect("should open file to write");
            outfile.write_all(&buf).expect("should write to file");
        }
        Commands::Index { index_filename } => {
            let mut file: File = File::open(args.gamedb_filename).expect("should open the file");
        Commands::Index => {
            let mut file: File = File::open(&args.gamedb_filename).expect("should open the file");
            let mut buf: Vec<u8> = Vec::new();
            file.read_to_end(&mut buf).expect("should read");

            let db: GameDB = bincode::deserialize(&buf).expect("deserializing should work");
            println!("loaded {} games", db.len());
            tracing::info!("loaded {} games", db.len());
            drop(buf);
            drop(file);
            println!("dropped buf and file");
            tracing::info!("dropped buf and file");

            let index = PositionIndex::load(&db);
            let results_index = GameResultIndex::construct(&db);
            let position_index = PositionIndex::construct(&db);

            let buf = bincode::serialize(&index).expect("serializing should work");
            save_all(&args.gamedb_filename, &position_index, &results_index);
            tracing::info!("finished saving indexes");

            let mut outfile: File = File::options()
                .write(true)
                .create(true)
                .open(index_filename)
                .expect("should open file to write");
            outfile.write_all(&buf).expect("should write to file");
            // To speed up exit times, we just forget about the memory without dropping it.
            std::mem::forget(db);
            std::mem::forget(results_index);
            std::mem::forget(position_index);
        }
    };
}

M isabella/src/db/mod.rs => isabella/src/db/mod.rs +4 -0
@@ 36,6 36,10 @@ impl GameDB {
        &self.games
    }

    pub fn strings(&self) -> &StringsTable {
        &self.strings
    }

    pub fn len(&self) -> usize {
        self.games.len()
    }

M isabella/src/game/mod.rs => isabella/src/game/mod.rs +19 -0
@@ 14,6 14,25 @@ pub enum StartingPosition {
    Custom(#[serde(with = "chess_serde")] Chess),
}

#[derive(Debug, Eq, PartialEq, Hash, Serialize, Deserialize)]
pub enum GameResult {
    WhiteWon,
    BlackWon,
    Drawn,
    Other,
}

impl From<&str> for GameResult {
    fn from(value: &str) -> Self {
        match value {
            "1-0" => Self::WhiteWon,
            "0-1" => Self::BlackWon,
            "1/2-1/2" => Self::Drawn,
            _ => Self::Other,
        }
    }
}

#[derive(Default, Debug, Serialize, Deserialize)]
pub struct Game {
    pub starting_position: StartingPosition,

A isabella/src/index/game_result.rs => isabella/src/index/game_result.rs +62 -0
@@ 0,0 1,62 @@
use bitmap::SparseBitmap;
use serde::{Deserialize, Serialize};

use crate::{db::GameDB, game::GameResult};

/// GameResultIndex maps from a particular reuslt (1-0, 0-1, 1/2-1/2, *) to
/// the games which concluded wiwth that result).
#[derive(Debug, Serialize, Deserialize)]
pub struct GameResultIndex {
    pub white_won: SparseBitmap,
    pub black_won: SparseBitmap,
    pub drawn: SparseBitmap,
    pub other: SparseBitmap,
}

impl GameResultIndex {
    pub fn of_size(size: usize) -> GameResultIndex {
        GameResultIndex {
            white_won: SparseBitmap::of_size(size),
            black_won: SparseBitmap::of_size(size),
            drawn: SparseBitmap::of_size(size),
            other: SparseBitmap::of_size(size),
        }
    }

    pub fn construct(db: &GameDB) -> GameResultIndex {
        let mut index = GameResultIndex::of_size(db.len());

        let strings = db.strings();

        let result_string_id = match strings.id_of("Result") {
            Some(id) => id,
            None => {
                tracing::info!("no games exist with tag \"Result\"");
                return index;
            }
        };

        for (idx, game) in db.games().iter().enumerate() {
            if let Some((_, val_id)) = game.tags.iter().find(|(key, _)| *key == result_string_id) {
                match GameResult::from(
                    strings
                        .by_id(*val_id)
                        .expect("result should be an interned string")
                        .as_ref(),
                ) {
                    GameResult::WhiteWon => index.white_won.set(idx),
                    GameResult::BlackWon => index.black_won.set(idx),
                    GameResult::Drawn => index.drawn.set(idx),
                    GameResult::Other => index.other.set(idx),
                }
                .expect("inserting into result bitmap should succeed");
            }

            if idx % 100_000 == 0 {
                tracing::debug!(idx, "construction in progress");
            }
        }

        index
    }
}

M isabella/src/index/mod.rs => isabella/src/index/mod.rs +3 -0
@@ 1,4 1,7 @@
pub mod game_result;
pub mod position;
pub mod save;
pub mod unique_fixed;

pub use game_result::GameResultIndex;
pub use position::PositionIndex;

M isabella/src/index/position.rs => isabella/src/index/position.rs +2 -2
@@ 20,7 20,7 @@ pub type BoardHash = u64;
pub type PositionIndex = UniqueFixedIndex<BoardHash, SparseBitmap>;

impl PositionIndex {
    pub fn load(db: &GameDB) -> PositionIndex {
    pub fn construct(db: &GameDB) -> PositionIndex {
        let init_size_guess = db.games().len() * AVG_MOVES_PER_GAME / 2;
        tracing::info!(init_size_guess, "initializing positions");



@@ 54,7 54,7 @@ impl PositionIndex {
            }

            if idx % 100_000 == 0 {
                tracing::debug!(idx, positions = positions.len(), "loading in progress");
                tracing::debug!(idx, positions = positions.len(), "construction in progress");
            }
        }


A isabella/src/index/save.rs => isabella/src/index/save.rs +39 -0
@@ 0,0 1,39 @@
use std::{fs::File, io::Write};

use super::{GameResultIndex, PositionIndex};

/// Saves the provided indexes out into files based on the passed-in filename.
pub fn save_all(
    base_filename: &str,
    position_index: &PositionIndex,
    game_result_index: &GameResultIndex,
) {
    save_game_result_index(base_filename, game_result_index);
    save_position_index(base_filename, position_index);
}

/// Saves the position index into a file based on the provided filename.
pub fn save_position_index(base_filename: &str, position_index: &PositionIndex) {
    let filename = format!("{base_filename}.position.idx");
    let buf = bincode::serialize(&position_index).expect("serializing should work");

    let mut outfile: File = File::options()
        .write(true)
        .create(true)
        .open(filename)
        .expect("should open file to write");
    outfile.write_all(&buf).expect("should write to file");
}

/// Saves the game result index into a file based on the provided filename.
pub fn save_game_result_index(base_filename: &str, game_result_index: &GameResultIndex) {
    let filename = format!("{base_filename}.result.idx");
    let buf = bincode::serialize(&game_result_index).expect("serializing should work");

    let mut outfile: File = File::options()
        .write(true)
        .create(true)
        .open(filename)
        .expect("should open file to write");
    outfile.write_all(&buf).expect("should write to file");
}

M isabella/src/strings.rs => isabella/src/strings.rs +6 -0
@@ 69,6 69,12 @@ impl StringsTable {
        let hash = generate_hash(&s);
        self.ids.contains_key(&hash)
    }

    /// finds the id of the supplied string, if it exists in the table
    pub fn id_of<S: AsRef<str>>(&self, s: S) -> Option<StringID> {
        let hash = generate_hash(&s);
        self.ids.get(&hash).copied()
    }
}

fn generate_hash<S: AsRef<str>>(s: S) -> StringHash {