~ntietz/isabella-db

01e17f06a9c4844c48df9168a483e5b952b1b62d — Nicholas Tietz-Sokolsky 18 days ago 4fe042d
Implement BitOr and BitAnd for SparseBitmap, and add an iterator.

This implements bitwise operations for the SparseBitmap so that we can
combine two bitmaps to get the end result of that. (Or will be used more
than And, likely, but both are good to have.)

In the process of implementing this feature, the implementation of the
bitmap was also simplified. Instead of alternating between runs of 0s
and 1s, we simply record just the runs of 1s along with their starting
position. The size should be the same (up to +/- 4 bytes per bitmap),
and this drastically simplifies the implementation of and and or.
2 files changed, 238 insertions(+), 38 deletions(-)

M bitmap/src/lib.rs
M bitmap/src/sparse.rs
M bitmap/src/lib.rs => bitmap/src/lib.rs +1 -0
@@ 1,3 1,4 @@
#![feature(let_chains)]
use thiserror::Error;

pub mod dense;

M bitmap/src/sparse.rs => bitmap/src/sparse.rs +237 -38
@@ 1,22 1,61 @@
use std::fmt::Debug;
use std::{
    fmt::Debug,
    ops::{BitAnd, BitOr},
};

use serde::{Deserialize, Serialize};

use super::{BitmapError, ItemID};

#[derive(Clone, Copy, Debug, Serialize, Deserialize)]
#[derive(Clone, Copy, Debug, Default, Eq, PartialEq, Serialize, Deserialize)]
pub struct Run {
    pub start: u32,
    pub length: u32,
}

impl Run {
    pub fn contains(&self, key: u32) -> bool {
        key >= self.start && key < self.start + self.length
    }

    pub fn overlap(&self, rhs: Run) -> bool {
        self.contains(rhs.start) || rhs.contains(self.start)
    }

    pub fn end(&self) -> u32 {
        self.start + self.length - 1
    }

    pub fn intersect(&self, rhs: Run) -> Option<Run> {
        if self.overlap(rhs) {
            let start = self.start.max(rhs.start);
            let end = self.end().min(rhs.end());
            let length = end - start + 1;
            Some(Run { start, length })
        } else {
            None
        }
    }

    pub fn union(&self, rhs: Run) -> Option<Run> {
        if self.overlap(rhs) {
            let start = self.start.min(rhs.start);
            let end = self.end().max(rhs.end());
            let length = end - start + 1;
            Some(Run { start, length })
        } else {
            None
        }
    }
}

/// SparseBitmap stores contiguous runs of the same value, rather than each bit
/// individually. It has a fixed size assigned at creation and cannot be
/// resized.
#[derive(Clone, Serialize, Deserialize)]
#[derive(Clone, Eq, PartialEq, Serialize, Deserialize)]
pub struct SparseBitmap {
    runs: Vec<Run>,
    size: usize,
    run_length: usize,
}

impl Debug for SparseBitmap {


@@ 24,18 63,16 @@ impl Debug for SparseBitmap {
        f.debug_struct("SparseBitmap")
            .field("runs.len()", &self.runs.len())
            .field("size", &self.size)
            .field("run_length", &self.run_length)
            .field("next_settable_index", &self.next_settable_index())
            .finish()
    }
}

impl SparseBitmap {
    pub fn of_size(size: usize) -> Self {
        // initially, we only have one run, so everything is 0s.
        SparseBitmap {
            size,
            run_length: 0,
            runs: vec![Run { length: 0 }],
            runs: Vec::new(),
        }
    }



@@ 43,39 80,31 @@ impl SparseBitmap {
        self.size
    }

    fn next_settable_index(&self) -> u32 {
        match self.runs.last() {
            Some(run) => run.start + run.length,
            None => 0,
        }
    }

    /// Sets the bit at the specified position to be true. Must be called in
    /// order from the lowest bit to the highest bit (for convenience of
    /// implementation).
    pub fn set(&mut self, key: ItemID) -> Result<(), BitmapError> {
        let next_settable_index = self.next_settable_index();
        if key >= self.size {
            return Err(BitmapError::OutOfBounds(key, self.size));
        } else if key + 1 == self.run_length {
            // skip attempts to set the most recent bit
        } else if key + 1 == next_settable_index as ItemID {
            // no-op to avoid an error if we repeatedly set the last bit
            return Ok(());
        } else if key < self.run_length {
        } else if key < next_settable_index as ItemID {
            return Err(BitmapError::SettingOutOfOrder);
        }

        let mut ends_with_0 = self.runs.len() % 2 == 1;
        let pad_by = key - self.run_length;

        if pad_by > 0 {
            if !ends_with_0 {
                self.runs.push(Run { length: 0 });
                ends_with_0 = true;
            }
            if let Some(run) = self.runs.last_mut() {
                run.length += pad_by as u32;
                self.run_length += pad_by;
            }
        }

        if ends_with_0 {
            self.runs.push(Run { length: 0 });
        }
        if let Some(run) = self.runs.last_mut() {
        if let Some(run) = self.runs.last_mut() && next_settable_index as ItemID == key {
            run.length += 1;
            self.run_length += 1;
        } else {
            self.runs.push(Run { start: key as u32, length: 1 });
        }

        Ok(())


@@ 86,20 115,137 @@ impl SparseBitmap {
            return None;
        }

        let mut idx: usize = 0;
        let mut bit = false;

        let key = key as u32;
        for run in &self.runs {
            idx += run.length as usize;
            if run.start > key {
                return Some(false);
            } else if run.contains(key) {
                return Some(true);
            }
        }

        Some(false)
    }

    fn add_run(&mut self, run: Run) {
        if let Some(last) = self.runs.last_mut() && last.overlap(run) {
            *last = last.union(run).unwrap();
        } else {
            self.runs.push(run);
        }
    }
}

pub struct SparseBitmapIterator<'a> {
    bitmap: &'a SparseBitmap,
    next_idx: usize,
    current: Run,
}
impl<'a> Iterator for SparseBitmapIterator<'a> {
    type Item = ItemID;

    fn next(&mut self) -> Option<Self::Item> {
        if self.next_idx > self.bitmap.runs.len() {
            return None;
        }

        let result = self.current.start;

        self.current.start += 1;
        self.current.length -= 1;

        if self.current.length == 0 {
            self.current = self
                .bitmap
                .runs
                .get(self.next_idx)
                .copied()
                .unwrap_or_default();
            self.next_idx += 1;
        }

        Some(result as ItemID)
    }
}

impl<'a> IntoIterator for &'a SparseBitmap {
    type Item = ItemID;
    type IntoIter = SparseBitmapIterator<'a>;

    fn into_iter(self) -> Self::IntoIter {
        SparseBitmapIterator {
            bitmap: self,
            next_idx: 1,
            current: self.runs.first().copied().unwrap_or_default(),
        }
    }
}

            if key < idx {
                return Some(bit);
impl BitAnd for &SparseBitmap {
    type Output = SparseBitmap;

    fn bitand(self, rhs: Self) -> Self::Output {
        let size = self.size.max(rhs.size);

        let mut result = SparseBitmap::of_size(size);

        let mut left_iter = self.runs.iter();
        let mut right_iter = rhs.runs.iter();

        let mut left_opt = left_iter.next();
        let mut right_opt = right_iter.next();

        while let Some(left) = left_opt && let Some(right) = right_opt {
            if let Some(run) = left.intersect(*right) {
                result.runs.push(run);
            }

            bit = !bit;
            if left.start < right.start {
                left_opt = left_iter.next();
            } else {
                right_opt = right_iter.next();
            }
        }

        Some(false)
        result
    }
}

impl BitOr for &SparseBitmap {
    type Output = SparseBitmap;

    fn bitor(self, rhs: Self) -> Self::Output {
        let size = self.size.max(rhs.size);

        let mut result = SparseBitmap::of_size(size);

        let mut left_iter = self.runs.iter().peekable();
        let mut right_iter = rhs.runs.iter().peekable();

        let mut left_opt = left_iter.next();
        let mut right_opt = right_iter.next();

        while let Some(left) = left_opt && let Some(right) = right_opt {
            if left.start < right.start {
                result.add_run(*left);
                left_opt = left_iter.next();
            } else {
                result.add_run(*right);
                right_opt = right_iter.next();
            }
        }

        while let Some(left) = left_opt {
            result.add_run(*left);
            left_opt = left_iter.next();
        }

        while let Some(right) = right_opt {
            result.add_run(*right);
            right_opt = right_iter.next();
        }

        result
    }
}



@@ 151,4 297,57 @@ mod tests {
        assert_eq!(bitmap.get(0), Some(true));
        assert_eq!(bitmap.get(1), Some(false));
    }

    #[test]
    fn sparse_bitmap_can_be_iterated() {
        let size = 8;
        let mut bitmap = SparseBitmap::of_size(size);

        let expected_bits = vec![0, 3, 4];

        for bit in &expected_bits {
            bitmap.set(*bit).expect("should set the value");
        }

        let bits: Vec<ItemID> = bitmap.into_iter().collect();

        assert_eq!(bits, expected_bits);
    }

    #[test]
    fn sparse_bitmap_bitwise_and() {
        let size = 16;

        let a = bitmap_from_vec(size, vec![0, 4, 5, 9, 10, 11]);
        let b = bitmap_from_vec(size, vec![4, 5, 7, 9, 10, 11, 12]);

        let and_bitm = &a & &b;
        let expected_and_bitm = bitmap_from_vec(size, vec![4, 5, 9, 10, 11]);

        assert_eq!(and_bitm, expected_and_bitm);
    }

    #[test]
    fn sparse_bitmap_bitwise_or() {
        let size = 16;

        let a = bitmap_from_vec(size, vec![0, 4, 5, 9, 10, 11]);
        let b = bitmap_from_vec(size, vec![4, 5, 7, 9, 10, 11, 12]);

        let or_bitm = &a | &b;
        let expected_or_bitm = bitmap_from_vec(size, vec![0, 4, 5, 7, 9, 10, 11, 12]);

        assert_eq!(or_bitm, expected_or_bitm);
    }

    /// Helper to build a bitmap from a **sorted** list of bit indexes.
    fn bitmap_from_vec(size: usize, bits: Vec<ItemID>) -> SparseBitmap {
        let mut bm = SparseBitmap::of_size(size);

        for bit in bits {
            bm.set(bit).expect("should set the value");
        }

        bm
    }
}