M src/bitarr.c => src/bitarr.c +24 -0
@@ 31,3 31,27 @@ BitArray* BitArray_init(unsigned int A[], uint32_t n, uint8_t element_size, size
for (i = 0; i < n; ++i) BitArray_write(bit_arr, i, A[i]);
return bit_arr;
}
+
+
+unsigned int BitArray_read(BitArray* bit_arr, unsigned int i)
+{
+ if (i >= bit_arr->n) {
+ fprintf(stderr, "%s:%d Out of bounds index\n", __FILE__, __LINE__);
+ exit(OUT_OF_BOUNDS);
+ }
+ return bit_read_range(bit_arr->v, bit_arr->width, i*bit_arr->element_size,
+ (i+1)*bit_arr->element_size-1);
+}
+
+// -- Writing -----------------------------------------------------------------
+void BitArray_write(BitArray* bit_arr, unsigned int i, unsigned int x)
+{
+ if (i >= bit_arr->n) {
+ fprintf(stderr, "%s:%d Out of bounds index\n", __FILE__, __LINE__);
+ exit(OUT_OF_BOUNDS);
+ }
+ bit_write_range(bit_arr->v, bit_arr->width, i*bit_arr->element_size,
+ (i+1)*bit_arr->element_size-1, x);
+}
+
+
M src/bitarr.h => src/bitarr.h +26 -7
@@ 64,14 64,10 @@
#include <stdlib.h>
#include <stdio.h>
#include <limits.h>
+#include "bitops.h"
+#include "common.h"
-typedef enum {
- BITARR_SUCCESS,
- OUT_OF_BOUNDS, // Indexing error
- FILE_ERROR // I/O Error
-} BITARR_ERROR;
-
/**
* @struct BitArray
@@ 120,8 116,31 @@ void BitArray_free(BitArray *bitarr);
* @param l Maximum number of bits for each element in A
* @return pointer to BitArray
*/
-BitArray* BitArray_init(unsigned int A[], uint32_t length, uint8_t element_size,
+BitArray* BitArray_init(unsigned int A[], uint32_t length, uint8_t element_size,
size_t word_size);
+
+/**
+ * @brief Get value from original array at index i
+ *
+ * The array held within a BitArray is a compact version of the original.
+ * We can retrieve this original value by reading the bits from the range
+ * [i * l, (i+1)*l-1] in the compact array.
+ *
+ * @param bit_arr
+ * @param i
+ * @return Value at A[i]
+ */
+unsigned int BitArray_read(BitArray* bit_arr, unsigned int i);
+
+/**
+ * @brief Write value to compact bit representation of array
+ *
+ * @param bit_arr Pointer to BitArray
+ * @param i Index in array to write
+ * @param x Integer to write
+ */
+void BitArray_write(BitArray* bit_arr, unsigned int i, unsigned int x);
+
#endif // BITARR_H_
A src/bitarr_vl.c => src/bitarr_vl.c +102 -0
@@ 0,0 1,102 @@
+#include "bitarr_vl.h"
+#include "bitops.h"
+#include "encoding.h"
+
+void VLBitArray_free(VLBitArray *bit_arr)
+{
+ free(bit_arr->W);
+ free(bit_arr);
+}
+
+VLBitArray *VLBitArray_init(unsigned int A[], size_t length, size_t k, size_t size)
+{
+
+ // bytes -> bits
+ size_t size_bits = size * 8;
+
+ // Find length of P
+ int p_len = ceil_int(length, k);
+ // Allocate struct and pointer vla
+ VLBitArray *vlb = calloc(1, sizeof(VLBitArray) + sizeof(size_t) * p_len);
+
+ size_t current_p_pos = 0;
+ // Create array of size the length of A
+ uint32_t gamma_A = 0,
+ g_length = 0,
+ g_offset = 0,
+ A_copy[length]; // Empty array of same size
+
+ for (size_t i = 0, j = 0; i < length; ++i) {
+ // Encode value as A[i] + 1 (gamma encoding can't be zero)
+ // code will be (g_offset << (g_length+1) | ((1 << g_length)))
+ g_length = (uint32_t) log2(A[i] + 1);
+ g_offset = (A[i] + 1) - (1 << g_length);
+
+ // Gamma code is of size length * 2
+ size_t p_increment = g_length * 2 + 1;
+
+ // Write gamma code of A[i] to array
+ bit_write_range(
+ A_copy,
+ size_bits,
+ current_p_pos,
+ current_p_pos + p_increment,
+ // Encode as g_offset.g_length
+ (g_offset << (g_length+1) | ((1 << g_length)))
+ );
+
+ // Assign current bit idx to pointer array
+ if (i % k == 0) vlb->P[j++] = current_p_pos;
+ current_p_pos += p_increment;
+ }
+
+ // Maximum number of elements of word size we need to fit total number of bits
+ size_t max_idx = ceil_int(current_p_pos, size_bits);
+ // Allocate array, and copy over only the needed bits from A_copy
+ vlb->W = malloc((size) * max_idx);
+ memcpy(vlb->W, A_copy, (size) * max_idx);
+
+ // Set struct members
+ vlb->k = k;
+ vlb->length = length;
+ vlb->logical_size = current_p_pos;
+ vlb->physical_size = max_idx;
+ vlb->element_size = size_bits;
+
+ return vlb;
+}
+
+
+uint32_t VLBitArray_read(VLBitArray* bit_arr, size_t i)
+{
+ if (i >= bit_arr->length) {
+ fprintf(stderr, "%s:%d Out of bounds index\n", __FILE__, __LINE__);
+ exit(OUT_OF_BOUNDS);
+ }
+
+
+ size_t curr_idx = bit_arr->P[ceil_int(i+1, bit_arr->k)-1],
+ idx_diff = i - (ceil_int(i+1, bit_arr->k)-1) * bit_arr->k,
+ max_idx = 0;
+
+ uint32_t g_length = 0,
+ chunk = 0;
+
+ int ii = 0;
+ while (ii <= idx_diff) {
+ max_idx = curr_idx + bit_arr->element_size - 1;
+ chunk = bit_read_range(
+ bit_arr->W,
+ bit_arr->element_size,
+ curr_idx,
+ max_idx +
+ ((max_idx > bit_arr->logical_size) * (bit_arr->logical_size - max_idx)) - 1
+ );
+
+ curr_idx += 1 + ((find_LSB(chunk)) * 2);
+ ii++;
+ }
+
+ return (gamma_decode(chunk) - 1);
+}
+
A src/bitarr_vl.h => src/bitarr_vl.h +33 -0
@@ 0,0 1,33 @@
+#ifndef BITARR_VL_
+#define BITARR_VL_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdio.h>
+#include <math.h>
+
+#include "common.h"
+#include "bitops.h"
+
+typedef struct {
+ size_t k;
+ size_t length; // Length of A
+ size_t logical_size; // Length of B
+ size_t physical_size; // Length of W
+ size_t element_size; // Size of each word in W
+ uint32_t *W;
+ size_t P[];
+} VLBitArray;
+
+
+void VLBitArray_free(VLBitArray *bit_arr);
+
+VLBitArray *VLBitArray_init(
+ unsigned int A[], size_t length, size_t k, size_t size
+);
+
+
+uint32_t VLBitArray_read(VLBitArray* bit_arr, size_t i);
+
+#endif // !BITARR_VL_
M src/bitops.c => src/bitops.c +81 -75
@@ 3,118 3,124 @@
* @brief Operations for BitArray
*/
+#include "bitops.h"
+#include <stdint.h>
-#include "bitarr.h"
-extern inline unsigned int sig_bit_idx(unsigned int j, unsigned int word_size)
+uint32_t find_LSB(uint32_t v)
{
- return ((j) % word_size) + 1;
+ // modified from:
+ // http://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightMultLookup
+ static const uint32_t Mod37BitPosition[] = {
+ 32, 0, 1, 26, 2, 23, 27, 0, 3, 16, 24, 30, 28, 11, 0, 13, 4,
+ 7, 17, 0, 25, 22, 31, 15, 29, 10, 12, 6, 0, 21, 14, 9, 5,
+ 20, 8, 19, 18
+ };
+
+ // map a bit value mod 37 to its position
+ return Mod37BitPosition[(-v & v) % 37];
}
-// -- Single bit ops ----------------------------------------------------------
-unsigned int BitArray_bitread(BitArray* bit_arr, unsigned int j) {
- return (bit_arr->v[j/bit_arr->width] >> (j % bit_arr->width)) & 1;
-}
-
-
-void BitArray_bitset(BitArray* bit_arr, unsigned int j)
+uint32_t find_MSB(uint32_t v)
{
- // Shift word left to bit idx, OR w/ 1
- bit_arr->v[j/bit_arr->width] |= 1 << (j % bit_arr->width);
-}
-void BitArray_bitclear(BitArray* bit_arr, unsigned int j)
-{
- // Shift word left to bit idx, AND w/ NOT(1)
- bit_arr->v[j/bit_arr->width] &= ~(1 << (j % bit_arr->width));
-}
+ static const uint32_t MultiplyDeBruijnBitPosition[32] =
+ {
+ 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30,
+ 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31
+ };
+ v |= v >> 1; // first round down to one less than a power of 2
+ v |= v >> 2;
+ v |= v >> 4;
+ v |= v >> 8;
+ v |= v >> 16;
-// -- Reading -----------------------------------------------------------------
+ return MultiplyDeBruijnBitPosition[(uint32_t)(v * 0x07C4ACDDU) >> 27];
+}
-unsigned int BitArray_bitsread(
- BitArray* bit_arr, unsigned int j1, unsigned int j
-)
-{
- if (j1 > j) return 0; // Early return if start idx > end idx
- // Confined w/in single word
- if (j1 / bit_arr->width == j / bit_arr->width) {
- return (
- // Shift word right
- (bit_arr->v[j/bit_arr->width] >> (j1 % bit_arr->width)) &
- // AND on bit vector of 1s the necessary length to extract only needed
- // bits
- ((1 << (j-j1+1)) - 1)
- );
- }
-
- // Spans two words
- return (
- // Get bits in the first word
- (bit_arr->v[j1/bit_arr->width] >> (j1 % bit_arr->width)) |
- // Bits in second word
- (bit_arr->v[j/bit_arr->width] & ((1 << ((j+1) % bit_arr->width)) - 1)) <<
- // Shift bits from second word n bits from first word left to make
- // room for concatenation
- (bit_arr->width - (j1 % bit_arr->width))
- );
+// -- Writing -----------------------------------------------------------------
+void bit_set(uint32_t *bit_arr, size_t size, size_t j)
+{
+ // Shift word left to bit idx, OR w/ 1
+ bit_arr[j/size] |= 1 << (j % size);
}
-unsigned int BitArray_read(BitArray* bit_arr, unsigned int i)
+void bit_clear(uint32_t* bit_arr, size_t size, size_t j)
{
- if (i >= bit_arr->n) {
- fprintf(stderr, "%s:%d Out of bounds index\n", __FILE__, __LINE__);
- exit(OUT_OF_BOUNDS);
- }
- return BitArray_bitsread(bit_arr, i*bit_arr->element_size,
- (i+1)*bit_arr->element_size-1);
+ // Shift word left to bit idx, AND w/ NOT(1)
+ bit_arr[j/size] &= ~(1 << (j % size));
}
-// -- Writing -----------------------------------------------------------------
-void BitArray_bitswrite(
- BitArray* bit_arr, unsigned int j1, unsigned int j, unsigned int x
+void bit_write_range(
+ uint32_t *bit_arr, size_t w, unsigned int j1, unsigned int j, unsigned int x
)
{
if (j1 > j) return; // Early return if start idx > end idx
- unsigned int w = bit_arr->width;
// Confined w/in single word
- if (j1 / bit_arr->width == j / bit_arr->width) {
+ if (j1 / w == j / w) {
// Clear bits
- bit_arr->v[j/bit_arr->width] &= (
- ~((unsigned) ((1 << (j-j1+1)) - 1) << (j1 % bit_arr->width))
+ bit_arr[j/w] &= (
+ ~((unsigned) ((1 << (j-j1+1)) - 1) << (j1 % w))
);
// Write x bits
- bit_arr->v[j/bit_arr->width] |= x << (j1 % bit_arr->width);
- } else {
+ bit_arr[j/w] |= x << (j1 % w);
+ } else {
// Spans two words
- bit_arr->v[j1/bit_arr->width] = (
+ bit_arr[j1/w] = (
// Get bits in first word to store lower bits
- (bit_arr->v[j1/bit_arr->width] & ((1 << (j1 % bit_arr->width)) - 1)) |
- // Write bits
- (x << (j1 % bit_arr->width))
+ (bit_arr[j1/w] & ((1 << (j1 % w)) - 1)) |
+ // Write bits
+ (x << (j1 % w))
);
- //
- bit_arr->v[j/bit_arr->width] = (
+
+ bit_arr[j/w] = (
// Get bits in second word to store lower bits
- (bit_arr->v[j/bit_arr->width] & ~((1 << ((j+1) % w)) - 1)) |
- // Write bits
+ (bit_arr[j/w] & ~((1 << ((j+1) % w)) - 1)) |
+ // Write bits
(x >> (w - (j1 % w)))
);
}
}
-void BitArray_write(BitArray* bit_arr, unsigned int i, unsigned int x)
+// -- Reading -----------------------------------------------------------------
+unsigned bit_read(uint32_t *bit_arr, size_t size, size_t j)
{
- if (i >= bit_arr->n) {
- fprintf(stderr, "%s:%d Out of bounds index\n", __FILE__, __LINE__);
- exit(OUT_OF_BOUNDS);
+ return (bit_arr[j/size] >> (j % size)) & 1;
+}
+
+unsigned int bit_read_range(
+ uint32_t *bit_arr, size_t width, unsigned int j1, unsigned int j
+)
+{
+ if (j1 > j) return 0; // Early return if start idx > end idx
+
+ // Confined w/in single word
+ if (j1 / width == j / width) {
+ return (
+ // Shift word right
+ (bit_arr[j/width] >> (j1 % width)) &
+ // AND on bit vector of 1s the necessary length to extract only needed
+ // bits
+ ((1 << (j-j1+1)) - 1)
+ );
}
- BitArray_bitswrite(bit_arr, i*bit_arr->element_size,
- (i+1)*bit_arr->element_size-1, x);
+
+ // Spans two words
+ return (
+ // Get bits in the first word
+ (bit_arr[j1/width] >> (j1 % width)) |
+ // Bits in second word
+ (bit_arr[j/width] & ((1 << ((j+1) % width)) - 1)) <<
+ // Shift bits from second word n bits from first word left to make
+ // room for concatenation
+ (width - (j1 % width))
+ );
}
+
M src/bitops.h => src/bitops.h +13 -85
@@ 2,95 2,23 @@
#define BITOPS_H_
#include <stdlib.h>
-#include "bitarr.h"
+#include <stdint.h>
+#include "common.h"
-/**
- * @brief Find significant bit of index j in compressed entry
- *
- * @param j
- * @param word_size
- * @return bit index
- */
-extern inline unsigned int sig_bit_idx(unsigned int j, unsigned int word_size);
+uint32_t find_LSB(uint32_t v);
+uint32_t find_MSB(uint32_t v);
-/**
- * @brief Read single bit at index `j`
- *
- * @param bit_arr Pointer to BitArray
- * @param j Index
- * @return bit value
- */
-unsigned int BitArray_bitread(BitArray* bit_arr, unsigned int j);
+void bit_set(uint32_t *bit_arr, size_t size, size_t j);
+void bit_clear(uint32_t *bit_arr, size_t size, size_t j);
+void bit_write_range(
+ uint32_t *bit_arr, size_t w, unsigned int j1, unsigned int j, unsigned int x
+);
-/**
- * @brief Set bit at index `j`
- *
- * Sets the the bit = 1 at index `j`. If bit is already equal to 1 then nothing
- * happens.
- *
- * @param bit_arr Pointer to BitArray
- * @param j Index
- */
-void BitArray_bitset(BitArray* bit_arr, unsigned int j);
+unsigned bit_read(uint32_t *bit_arr, size_t size, size_t j);
-/**
- * @brief Clears bit at index `j`
- *
- * Sets the the bit = 0 at index `j`. If bit is already equal to 0 then nothing
- * happens.
- *
- * @param bit_arr Pointer to BitArray
- * @param j Index
- */
-void BitArray_bitclear(BitArray* bit_arr, unsigned int j);
-
-/**
- * @brief Reads range of bits B[j1, j]
- *
- * @param bit_arr
- * @param j1
- * @param j
- * @return Integer constructed from bits [j1, j]
- */
-unsigned int BitArray_bitsread(BitArray* bit_arr, unsigned int j1, unsigned int j);
-
-
-/**
- * @brief Get value from original array at index i
- *
- * The array held within a BitArray is a compact version of the original.
- * We can retrieve this original value by reading the bits from the range
- * [i * l, (i+1)*l-1] in the compact array.
- *
- * @param bit_arr
- * @param i
- * @return Value at A[i]
- */
-unsigned int BitArray_read(BitArray* bit_arr, unsigned int i);
-
-
-/**
- * @brief Write to range of bits
- *
- * Abstract function, most will want to use BitArray_write instead
- *
- * @param bit_arr Pointer to BitArray
- * @param j1 Starting index for virtual bit array
- * @param j Ending index for virtual bit array
- * @param x Integer to write
- */
-void BitArray_bitswrite(BitArray* bit_arr, unsigned int j1, unsigned int j, unsigned int x);
-
-
-
-/**
- * @brief Write value to compact bit representation of array
- *
- * @param bit_arr Pointer to BitArray
- * @param i Index in array to write
- * @param x Integer to write
- */
-void BitArray_write(BitArray* bit_arr, unsigned int i, unsigned int x);
+unsigned int bit_read_range(
+ uint32_t *bit_arr, size_t width, unsigned int j1, unsigned int j
+);
#endif // !BITOPS_H_
A src/common.h => src/common.h +12 -0
@@ 0,0 1,12 @@
+#ifndef COMMON_H_
+#define COMMON_H_
+
+#define ceil_int(x, y) (1 + (x - 1) / y)
+
+typedef enum {
+ BITARR_SUCCESS,
+ OUT_OF_BOUNDS, // Indexing error
+ FILE_ERROR // I/O Error
+} BITARR_ERROR;
+
+#endif<
\ No newline at end of file
M src/encoding.c => src/encoding.c +2 -21
@@ 5,25 5,6 @@
#include "encoding.h"
-#include <stdint.h>
-#include <stdio.h>
-
-
-uint32_t count_trailing_zeros(unsigned int v)
-{
- // modified from:
- // http://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightMultLookup
- static const uint32_t Mod37BitPosition[] = {
- 32, 0, 1, 26, 2, 23, 27, 0, 3, 16, 24, 30, 28, 11, 0, 13, 4,
- 7, 17, 0, 25, 22, 31, 15, 29, 10, 12, 6, 0, 21, 14, 9, 5,
- 20, 8, 19, 18
- };
-
- // map a bit value mod 37 to its position
- return Mod37BitPosition[(-v & v) % 37];
-}
-
-
unsigned int unary_encode(uint32_t k)
{
// code 1 . 0 k times (e.g. 3 := 1 . 000
@@ 39,7 20,7 @@ unsigned int gamma_encode(uint32_t k)
length = (uint32_t) log2(k);
offset = k - (1 << length);
- // Unary coded offset . length
+ // Unary coded offset . length
// e.g. 13 (l = 3, o = 5) := 101.1000
return (offset << (length+1) | ((1 << length)));
}
@@ 49,7 30,7 @@ unsigned int gamma_decode(unsigned int k)
uint32_t l = 0;
uint32_t o = 0;
- l = count_trailing_zeros(k);
+ l = find_LSB(k); // idx of LSB set to 1
k >>= l;
// Mask for l bits
M src/encoding.h => src/encoding.h +1 -0
@@ 3,6 3,7 @@
#include <stdint.h>
#include <math.h>
+#include "bitops.h"
M tests/tests.c => tests/tests.c +39 -12
@@ 6,6 6,7 @@
#include "../src/bitops.h"
#include "../src/bitarr_io.h"
#include "../src/encoding.h"
+#include "../src/bitarr_vl.h"
@@ 15,9 16,10 @@ BEGIN_TESTING
// -- Data --------------------------------------------------------------------
unsigned int A[10] = { 20, 18, 22, 22, 16, 21, 11, 22, 21, 21 };
+unsigned int A_vl[10] = { 0, 1, 0, 2, 5, 1, 3, 2, 8, 2 };
/*
-* Both of binary representations of b have been flipped since when reading
+* Both of binary representations of b have been flipped since when reading
* individual bits the most from array A, the least significant bit will be read
* first from each int.
*
@@ 61,7 63,7 @@ TEST("single bit read")
{
unsigned int b;
for (unsigned int i = 0; i < 64; ++i) {
- b = BitArray_bitread(bit_arr, i);
+ b = bit_read(bit_arr->v, bit_arr->width, i);
assert(b == B_sig_ordered[i]);
}
printf("✔ bit read passed\n");
@@ 71,26 73,26 @@ TEST("bit set & clear")
{
unsigned int og_bit, nu_bit, idx;
idx = 2;
- og_bit = BitArray_bitread(bit_arr, 2);
-
+ og_bit = bit_read(bit_arr->v, bit_arr->width, idx);
+
// 1 -> 1
- BitArray_bitset(bit_arr, idx);
- nu_bit = BitArray_bitread(bit_arr, idx);
+ bit_set(bit_arr->v, bit_arr->width, idx);
+ nu_bit = bit_read(bit_arr->v, bit_arr->width, idx);
assert((og_bit & nu_bit) == 1);
// 1 -> 0
- BitArray_bitclear(bit_arr, idx);
- nu_bit = BitArray_bitread(bit_arr, idx);
+ bit_clear(bit_arr->v, bit_arr->width, idx);
+ nu_bit = bit_read(bit_arr->v, bit_arr->width, idx);
assert(nu_bit == 0);
// 0 -> 0
- BitArray_bitclear(bit_arr, idx);
- nu_bit = BitArray_bitread(bit_arr, idx);
+ bit_clear(bit_arr->v, bit_arr->width, idx);
+ nu_bit = bit_read(bit_arr->v, bit_arr->width, idx);
assert(nu_bit == 0);
// 0 -> 1 (back to original)
- BitArray_bitset(bit_arr, idx);
- nu_bit = BitArray_bitread(bit_arr, idx);
+ bit_set(bit_arr->v, bit_arr->width, idx);
+ nu_bit = bit_read(bit_arr->v, bit_arr->width, idx);
assert((og_bit & nu_bit) == 1);
printf("✔ bit set/clear passed\n");
}
@@ 144,6 146,31 @@ TEST("Gamma encoding")
printf("✔ Gamma coding\n");
}
+TEST("VL BitArray")
+{
+
+
+ uint32_t correct_W_vla[2] = { 415519957, 3 };
+ VLBitArray *vlb = VLBitArray_init(A_vl, 10, 4, sizeof(uint32_t));
+ uint32_t AA[31] = {1, 100, 200, 11, 1, 50, 1000};
+ AA[20] = 20;
+ AA[30] = 10000;
+ VLBitArray *vlb1 = VLBitArray_init(AA, 31, 30, sizeof(uint32_t));
+
+ for (size_t i = 0; i < 2; i++) {
+ assert(correct_W_vla[i] == vlb->W[i]);
+ }
+ for (size_t i = 0; i < 10; ++i) assert(A_vl[i] == VLBitArray_read(vlb, i));
+
+ VLBitArray_free(vlb);
+
+ for (size_t i = 0; i < 31; ++i) {
+ assert(AA[i] == VLBitArray_read(vlb1, i));
+ }
+
+ printf("✔ Variable Length BitArray\n");
+}
+