~obeancomputer/bitter

aa9fa768dc099525a6c9677a91ba37acd78fda95 — ocsmit 1 year, 1 month ago 7806827 main
Update to add variable length encoding for bit array
M src/bitarr.c => src/bitarr.c +24 -0
@@ 31,3 31,27 @@ BitArray* BitArray_init(unsigned int A[], uint32_t n, uint8_t element_size, size
    for (i = 0; i < n; ++i) BitArray_write(bit_arr, i, A[i]);
    return bit_arr;
}


unsigned int BitArray_read(BitArray* bit_arr, unsigned int i)
{
  if (i >= bit_arr->n) {
    fprintf(stderr, "%s:%d Out of bounds index\n", __FILE__, __LINE__);
    exit(OUT_OF_BOUNDS);
  }
  return bit_read_range(bit_arr->v, bit_arr->width, i*bit_arr->element_size,
    (i+1)*bit_arr->element_size-1);
}

// -- Writing -----------------------------------------------------------------
void BitArray_write(BitArray* bit_arr, unsigned int i, unsigned int x)
{
  if (i >= bit_arr->n) {
    fprintf(stderr, "%s:%d Out of bounds index\n", __FILE__, __LINE__);
    exit(OUT_OF_BOUNDS);
  }
  bit_write_range(bit_arr->v, bit_arr->width, i*bit_arr->element_size,
    (i+1)*bit_arr->element_size-1, x);
}



M src/bitarr.h => src/bitarr.h +26 -7
@@ 64,14 64,10 @@
#include <stdlib.h>
#include <stdio.h>
#include <limits.h>
#include "bitops.h"
#include "common.h"


typedef enum {
  BITARR_SUCCESS,
  OUT_OF_BOUNDS,      // Indexing error
  FILE_ERROR          // I/O Error
} BITARR_ERROR;


/**
 * @struct BitArray


@@ 120,8 116,31 @@ void BitArray_free(BitArray *bitarr);
 * @param l         Maximum number of bits for each element in A
 * @return          pointer to BitArray
 */
BitArray* BitArray_init(unsigned int A[], uint32_t length, uint8_t element_size, 
BitArray* BitArray_init(unsigned int A[], uint32_t length, uint8_t element_size,
  size_t word_size);



/**
 * @brief Get value from original array at index i
 *
 * The array held within a BitArray is a compact version of the original.
 * We can retrieve this original value by reading the bits from the range
 * [i * l, (i+1)*l-1] in the compact array.
 *
 * @param bit_arr
 * @param i
 * @return Value at A[i]
 */
unsigned int BitArray_read(BitArray* bit_arr, unsigned int i);

/**
 * @brief Write value to compact bit representation of array
 *
 * @param bit_arr Pointer to BitArray
 * @param i       Index in array to write
 * @param x       Integer to write
 */
void BitArray_write(BitArray* bit_arr, unsigned int i, unsigned int x);

#endif // BITARR_H_

A src/bitarr_vl.c => src/bitarr_vl.c +102 -0
@@ 0,0 1,102 @@
#include "bitarr_vl.h"
#include "bitops.h"
#include "encoding.h"

void VLBitArray_free(VLBitArray *bit_arr)
{
    free(bit_arr->W);
    free(bit_arr);
}

VLBitArray *VLBitArray_init(unsigned int A[], size_t length, size_t k, size_t size)
{

    // bytes -> bits
    size_t size_bits = size * 8;

    // Find length of P
    int p_len = ceil_int(length, k);
    // Allocate struct and pointer vla
    VLBitArray *vlb = calloc(1, sizeof(VLBitArray) + sizeof(size_t) * p_len);

    size_t current_p_pos = 0;
    // Create array of size the length of A
    uint32_t gamma_A = 0,
             g_length = 0,
             g_offset = 0,
             A_copy[length]; // Empty array of same size

    for (size_t i = 0, j = 0; i < length; ++i) {
        // Encode value as A[i] + 1 (gamma encoding can't be zero)
        // code will be (g_offset << (g_length+1) | ((1 << g_length)))
        g_length = (uint32_t) log2(A[i] + 1);
        g_offset = (A[i] + 1) - (1 << g_length);

        // Gamma code is of size length * 2
        size_t p_increment = g_length * 2 + 1;

        // Write gamma code of A[i] to array
        bit_write_range(
            A_copy,
            size_bits,
            current_p_pos,
            current_p_pos + p_increment,
            // Encode as g_offset.g_length
            (g_offset << (g_length+1) | ((1 << g_length)))
        );

        // Assign current bit idx to pointer array
        if (i % k == 0) vlb->P[j++] = current_p_pos;
        current_p_pos += p_increment;
    }

    // Maximum number of elements of word size we need to fit total number of bits
    size_t max_idx = ceil_int(current_p_pos, size_bits);
    // Allocate array, and copy over only the needed bits from A_copy
    vlb->W = malloc((size) * max_idx);
    memcpy(vlb->W, A_copy, (size) * max_idx);

    // Set struct members
    vlb->k = k;
    vlb->length = length;
    vlb->logical_size = current_p_pos;
    vlb->physical_size = max_idx;
    vlb->element_size = size_bits;

    return vlb;
}


uint32_t VLBitArray_read(VLBitArray* bit_arr, size_t i)
{
    if (i >= bit_arr->length) {
        fprintf(stderr, "%s:%d Out of bounds index\n", __FILE__, __LINE__);
        exit(OUT_OF_BOUNDS);
    }


    size_t curr_idx = bit_arr->P[ceil_int(i+1, bit_arr->k)-1],
           idx_diff = i - (ceil_int(i+1, bit_arr->k)-1) * bit_arr->k,
           max_idx = 0;

    uint32_t g_length = 0,
             chunk = 0;

    int ii = 0;
    while (ii <= idx_diff) {
        max_idx = curr_idx + bit_arr->element_size - 1;
        chunk = bit_read_range(
            bit_arr->W, 
            bit_arr->element_size, 
            curr_idx,
            max_idx + 
            ((max_idx > bit_arr->logical_size) * (bit_arr->logical_size - max_idx)) - 1
            );

        curr_idx += 1 + ((find_LSB(chunk)) * 2);
        ii++;
    }

    return (gamma_decode(chunk) - 1);
}


A src/bitarr_vl.h => src/bitarr_vl.h +33 -0
@@ 0,0 1,33 @@
#ifndef BITARR_VL_
#define BITARR_VL_

#include <stddef.h>
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include <math.h>

#include "common.h"
#include "bitops.h"

typedef struct {
    size_t k;
    size_t length; // Length of A
    size_t logical_size; // Length of B
    size_t physical_size; // Length of W
    size_t element_size; // Size of each word in W
    uint32_t *W;
    size_t P[];
} VLBitArray;


void VLBitArray_free(VLBitArray *bit_arr);

VLBitArray *VLBitArray_init(
    unsigned int A[], size_t length, size_t k, size_t size
);


uint32_t VLBitArray_read(VLBitArray* bit_arr, size_t i);

#endif // !BITARR_VL_

M src/bitops.c => src/bitops.c +81 -75
@@ 3,118 3,124 @@
 * @brief Operations for BitArray
 */

#include "bitops.h"
#include <stdint.h>

#include "bitarr.h"

extern inline unsigned int sig_bit_idx(unsigned int j, unsigned int word_size)
uint32_t find_LSB(uint32_t v)
{
  return ((j) % word_size) + 1;
    // modified from:
    // http://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightMultLookup
    static const uint32_t Mod37BitPosition[] = {
      32, 0, 1, 26, 2, 23, 27, 0, 3, 16, 24, 30, 28, 11, 0, 13, 4,
      7, 17, 0, 25, 22, 31, 15, 29, 10, 12, 6, 0, 21, 14, 9, 5,
      20, 8, 19, 18
    };

    // map a bit value mod 37 to its position
    return Mod37BitPosition[(-v & v) % 37];
}

// -- Single bit ops ----------------------------------------------------------

unsigned int BitArray_bitread(BitArray* bit_arr, unsigned int j) {
  return (bit_arr->v[j/bit_arr->width] >> (j % bit_arr->width)) & 1;
}

  
void BitArray_bitset(BitArray* bit_arr, unsigned int j)
uint32_t find_MSB(uint32_t v)
{
  // Shift word left to bit idx, OR w/ 1
  bit_arr->v[j/bit_arr->width] |= 1 << (j % bit_arr->width);
}

void BitArray_bitclear(BitArray* bit_arr, unsigned int j)
{
  // Shift word left to bit idx, AND w/ NOT(1)
  bit_arr->v[j/bit_arr->width] &= ~(1 << (j % bit_arr->width));
}
    static const uint32_t MultiplyDeBruijnBitPosition[32] =
        {
            0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30,
            8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31
        };

    v |= v >> 1; // first round down to one less than a power of 2
    v |= v >> 2;
    v |= v >> 4;
    v |= v >> 8;
    v |= v >> 16;

// -- Reading -----------------------------------------------------------------
    return MultiplyDeBruijnBitPosition[(uint32_t)(v * 0x07C4ACDDU) >> 27];
}

unsigned int BitArray_bitsread(
  BitArray* bit_arr, unsigned int j1, unsigned int j
)
{
  if (j1 > j) return 0; // Early return if start idx > end idx

  // Confined w/in single word
  if (j1 / bit_arr->width == j / bit_arr->width) { 
    return ( 
      // Shift word right
      (bit_arr->v[j/bit_arr->width] >> (j1 % bit_arr->width)) & 
      // AND on bit vector of 1s the necessary length to extract only needed 
      // bits 
      ((1 << (j-j1+1)) - 1)
    );
  }

  // Spans two words
  return (
  // Get bits in the first word
  (bit_arr->v[j1/bit_arr->width] >> (j1 % bit_arr->width)) | 
  // Bits in second word
  (bit_arr->v[j/bit_arr->width] &  ((1 << ((j+1) % bit_arr->width)) - 1)) <<
    // Shift bits from second word n bits from first word left to make 
    // room for concatenation
    (bit_arr->width - (j1 % bit_arr->width))
  );
// -- Writing -----------------------------------------------------------------
void bit_set(uint32_t *bit_arr, size_t size, size_t j)
{
  // Shift word left to bit idx, OR w/ 1
  bit_arr[j/size] |= 1 << (j % size);
}


unsigned int BitArray_read(BitArray* bit_arr, unsigned int i)
void bit_clear(uint32_t* bit_arr, size_t size, size_t j)
{
  if (i >= bit_arr->n) {
    fprintf(stderr, "%s:%d Out of bounds index\n", __FILE__, __LINE__);
    exit(OUT_OF_BOUNDS);
  }
  return BitArray_bitsread(bit_arr, i*bit_arr->element_size,
    (i+1)*bit_arr->element_size-1); 
  // Shift word left to bit idx, AND w/ NOT(1)
  bit_arr[j/size] &= ~(1 << (j % size));
}

// -- Writing -----------------------------------------------------------------

void BitArray_bitswrite(
  BitArray* bit_arr, unsigned int j1, unsigned int j, unsigned int x
void bit_write_range(
  uint32_t *bit_arr, size_t w, unsigned int j1, unsigned int j, unsigned int x
)
{
  if (j1 > j) return; // Early return if start idx > end idx
  unsigned int w = bit_arr->width;

  // Confined w/in single word
  if (j1 / bit_arr->width == j / bit_arr->width) { 
  if (j1 / w == j / w) {
    // Clear bits
    bit_arr->v[j/bit_arr->width] &= (
      ~((unsigned) ((1 << (j-j1+1)) - 1) << (j1 % bit_arr->width))
    bit_arr[j/w] &= (
      ~((unsigned) ((1 << (j-j1+1)) - 1) << (j1 % w))
    );
    // Write x bits
    bit_arr->v[j/bit_arr->width] |= x << (j1 % bit_arr->width); 
  } else {  
    bit_arr[j/w] |= x << (j1 % w);
  } else {
    // Spans two words
    bit_arr->v[j1/bit_arr->width] = (
    bit_arr[j1/w] = (
      // Get bits in first word to store lower bits
      (bit_arr->v[j1/bit_arr->width] & ((1 << (j1 % bit_arr->width)) - 1)) | 
      // Write bits 
      (x <<  (j1 % bit_arr->width))
      (bit_arr[j1/w] & ((1 << (j1 % w)) - 1)) |
      // Write bits
      (x <<  (j1 % w))
    );
    //
    bit_arr->v[j/bit_arr->width] = (

    bit_arr[j/w] = (
      // Get bits in second word to store lower bits
      (bit_arr->v[j/bit_arr->width] & ~((1 << ((j+1) % w)) - 1)) | 
      // Write bits 
      (bit_arr[j/w] & ~((1 << ((j+1) % w)) - 1)) |
      // Write bits
      (x >> (w - (j1 % w)))
    );
  }
}


void BitArray_write(BitArray* bit_arr, unsigned int i, unsigned int x)
// -- Reading -----------------------------------------------------------------
unsigned bit_read(uint32_t *bit_arr, size_t size, size_t j)
{
  if (i >= bit_arr->n) {
    fprintf(stderr, "%s:%d Out of bounds index\n", __FILE__, __LINE__);
    exit(OUT_OF_BOUNDS);
  return (bit_arr[j/size] >> (j % size)) & 1;
}

unsigned int bit_read_range(
  uint32_t *bit_arr, size_t width, unsigned int j1, unsigned int j
)
{
  if (j1 > j) return 0; // Early return if start idx > end idx

  // Confined w/in single word
  if (j1 / width == j / width) {
    return (
      // Shift word right
      (bit_arr[j/width] >> (j1 % width)) &
      // AND on bit vector of 1s the necessary length to extract only needed
      // bits
      ((1 << (j-j1+1)) - 1)
    );
  }
  BitArray_bitswrite(bit_arr, i*bit_arr->element_size, 
    (i+1)*bit_arr->element_size-1, x); 

  // Spans two words
  return (
  // Get bits in the first word
  (bit_arr[j1/width] >> (j1 % width)) |
  // Bits in second word
  (bit_arr[j/width] &  ((1 << ((j+1) % width)) - 1)) <<
    // Shift bits from second word n bits from first word left to make
    // room for concatenation
    (width - (j1 % width))
  );
}


M src/bitops.h => src/bitops.h +13 -85
@@ 2,95 2,23 @@
#define BITOPS_H_

#include <stdlib.h>
#include "bitarr.h"
#include <stdint.h>
#include "common.h"


/**
 * @brief Find significant bit of index j in compressed entry
 *
 * @param j 
 * @param word_size 
 * @return bit index
 */
extern inline unsigned int sig_bit_idx(unsigned int j, unsigned int word_size);
uint32_t find_LSB(uint32_t v);
uint32_t find_MSB(uint32_t v);

/**
 * @brief Read single bit at index `j`
 *
 * @param bit_arr   Pointer to BitArray
 * @param j         Index 
 * @return          bit value
 */
unsigned int BitArray_bitread(BitArray* bit_arr, unsigned int j);
void bit_set(uint32_t *bit_arr, size_t size, size_t j);
void bit_clear(uint32_t *bit_arr, size_t size, size_t j);
void bit_write_range(
  uint32_t *bit_arr, size_t w, unsigned int j1, unsigned int j, unsigned int x
);

/**
 * @brief Set bit at index `j`
 *
 * Sets the the bit = 1 at index `j`. If bit is already equal to 1 then nothing
 * happens.
 *
 * @param bit_arr   Pointer to BitArray
 * @param j         Index 
 */
void BitArray_bitset(BitArray* bit_arr, unsigned int j);
unsigned bit_read(uint32_t *bit_arr, size_t size, size_t j);

/**
 * @brief Clears bit at index `j`
 *
 * Sets the the bit = 0 at index `j`. If bit is already equal to 0 then nothing
 * happens.
 *
 * @param bit_arr   Pointer to BitArray
 * @param j         Index 
 */
void BitArray_bitclear(BitArray* bit_arr, unsigned int j);

/**
 * @brief Reads range of bits B[j1, j]
 *
 * @param bit_arr 
 * @param j1 
 * @param j 
 * @return Integer constructed from bits [j1, j] 
 */
unsigned int BitArray_bitsread(BitArray* bit_arr, unsigned int j1, unsigned int j);


/**
 * @brief Get value from original array at index i
 *
 * The array held within a BitArray is a compact version of the original.
 * We can retrieve this original value by reading the bits from the range 
 * [i * l, (i+1)*l-1] in the compact array.
 *
 * @param bit_arr 
 * @param i 
 * @return Value at A[i]
 */
unsigned int BitArray_read(BitArray* bit_arr, unsigned int i);


/**
 * @brief Write to range of bits
 *
 * Abstract function, most will want to use BitArray_write instead
 *
 * @param bit_arr Pointer to BitArray
 * @param j1      Starting index for virtual bit array
 * @param j       Ending index for virtual bit array
 * @param x       Integer to write
 */
void BitArray_bitswrite(BitArray* bit_arr, unsigned int j1, unsigned int j, unsigned int x);



/**
 * @brief Write value to compact bit representation of array
 *
 * @param bit_arr Pointer to BitArray
 * @param i       Index in array to write
 * @param x       Integer to write
 */
void BitArray_write(BitArray* bit_arr, unsigned int i, unsigned int x);
unsigned int bit_read_range(
  uint32_t *bit_arr, size_t width, unsigned int j1, unsigned int j
);

#endif // !BITOPS_H_

A src/common.h => src/common.h +12 -0
@@ 0,0 1,12 @@
#ifndef COMMON_H_
#define COMMON_H_

#define ceil_int(x, y) (1 + (x - 1) / y)

typedef enum {
  BITARR_SUCCESS,
  OUT_OF_BOUNDS,      // Indexing error
  FILE_ERROR          // I/O Error
} BITARR_ERROR;

#endif
\ No newline at end of file

M src/encoding.c => src/encoding.c +2 -21
@@ 5,25 5,6 @@

#include "encoding.h"

#include <stdint.h>
#include <stdio.h>


uint32_t count_trailing_zeros(unsigned int v)
{
    // modified from:
    // http://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightMultLookup
    static const uint32_t Mod37BitPosition[] = {
      32, 0, 1, 26, 2, 23, 27, 0, 3, 16, 24, 30, 28, 11, 0, 13, 4,
      7, 17, 0, 25, 22, 31, 15, 29, 10, 12, 6, 0, 21, 14, 9, 5,
      20, 8, 19, 18
    };

    // map a bit value mod 37 to its position
    return Mod37BitPosition[(-v & v) % 37];
}


unsigned int unary_encode(uint32_t k)
{
    // code 1 . 0 k times (e.g. 3 := 1 . 000


@@ 39,7 20,7 @@ unsigned int gamma_encode(uint32_t k)
    length = (uint32_t) log2(k);
    offset = k - (1 << length);

    // Unary coded offset . length 
    // Unary coded offset . length
    // e.g. 13 (l = 3, o = 5) := 101.1000
    return (offset << (length+1) | ((1 << length)));
}


@@ 49,7 30,7 @@ unsigned int gamma_decode(unsigned int k)
    uint32_t l = 0;
    uint32_t o = 0;

    l = count_trailing_zeros(k);
    l = find_LSB(k); // idx of LSB set to 1
    k >>= l;

    // Mask for l bits

M src/encoding.h => src/encoding.h +1 -0
@@ 3,6 3,7 @@

#include <stdint.h>
#include <math.h>
#include "bitops.h"




M tests/tests.c => tests/tests.c +39 -12
@@ 6,6 6,7 @@
#include "../src/bitops.h"
#include "../src/bitarr_io.h"
#include "../src/encoding.h"
#include "../src/bitarr_vl.h"





@@ 15,9 16,10 @@ BEGIN_TESTING

// -- Data --------------------------------------------------------------------
unsigned int A[10] = { 20, 18, 22, 22, 16, 21, 11, 22, 21, 21 };
unsigned int A_vl[10] = { 0, 1, 0, 2, 5, 1, 3, 2, 8, 2 };

/*
* Both of binary representations of b have been flipped since when reading 
* Both of binary representations of b have been flipped since when reading
* individual bits the most from array A, the least significant bit will be read
* first from each int.
*


@@ 61,7 63,7 @@ TEST("single bit read")
{
    unsigned int b;
    for (unsigned int i = 0; i < 64; ++i) {
        b = BitArray_bitread(bit_arr, i);
        b = bit_read(bit_arr->v, bit_arr->width, i);
        assert(b == B_sig_ordered[i]);
    }
    printf("✔ bit read passed\n");


@@ 71,26 73,26 @@ TEST("bit set & clear")
{
    unsigned int og_bit, nu_bit, idx;
    idx = 2;
    og_bit = BitArray_bitread(bit_arr, 2);
    
    og_bit = bit_read(bit_arr->v, bit_arr->width, idx);

    // 1 -> 1
    BitArray_bitset(bit_arr, idx);
    nu_bit = BitArray_bitread(bit_arr, idx);
    bit_set(bit_arr->v, bit_arr->width, idx);
    nu_bit = bit_read(bit_arr->v, bit_arr->width, idx);
    assert((og_bit & nu_bit) == 1);

    // 1 -> 0
    BitArray_bitclear(bit_arr, idx);
    nu_bit = BitArray_bitread(bit_arr, idx);
    bit_clear(bit_arr->v, bit_arr->width, idx);
    nu_bit = bit_read(bit_arr->v, bit_arr->width, idx);
    assert(nu_bit == 0);

    // 0 -> 0
    BitArray_bitclear(bit_arr, idx);
    nu_bit = BitArray_bitread(bit_arr, idx);
    bit_clear(bit_arr->v, bit_arr->width, idx);
    nu_bit = bit_read(bit_arr->v, bit_arr->width, idx);
    assert(nu_bit == 0);

    // 0 -> 1 (back to original)
    BitArray_bitset(bit_arr, idx);
    nu_bit = BitArray_bitread(bit_arr, idx);
    bit_set(bit_arr->v, bit_arr->width, idx);
    nu_bit = bit_read(bit_arr->v, bit_arr->width, idx);
    assert((og_bit & nu_bit) == 1);
    printf("✔ bit set/clear passed\n");
}


@@ 144,6 146,31 @@ TEST("Gamma encoding")
    printf("✔ Gamma coding\n");
}

TEST("VL BitArray")
{


    uint32_t correct_W_vla[2] = { 415519957, 3 };
    VLBitArray *vlb = VLBitArray_init(A_vl, 10, 4, sizeof(uint32_t));
    uint32_t AA[31] = {1, 100, 200, 11, 1, 50, 1000};
    AA[20] = 20;
    AA[30] = 10000;
    VLBitArray *vlb1 = VLBitArray_init(AA, 31, 30, sizeof(uint32_t));

    for (size_t i = 0; i < 2; i++) {
        assert(correct_W_vla[i] == vlb->W[i]);
    }
    for (size_t i = 0; i < 10; ++i) assert(A_vl[i] == VLBitArray_read(vlb, i));

    VLBitArray_free(vlb);

    for (size_t i = 0; i < 31; ++i) {
        assert(AA[i] == VLBitArray_read(vlb1, i));
    }

    printf("✔ Variable Length BitArray\n");
}