@@ 1,21 1,17 @@
(* Persistent Vectors are implement as a wide-branching tree. Here, the branch factor
- is chosen to be 32, which means at most 5 hops in the tree need to be made to find
- the element in question.
- TODO: How does this square with 32 bit OCaml only having 31 real bits available?
- *)
-let bits_per_level = 5
-let branch_factor = 1 lsl bits_per_level
-
-(* Paths down into the tree are indexed by the bits of the logical index, left to right,
+ is chosen to be 32.
+ Elements are stored only at the leaves.
+ Paths down into the tree are indexed by the bits of the logical index, left to right,
in groups of `bits_per_level`. Note that unused bits are kept on the left. That is,
if a given vector's size is small enough to be addressed with 15 bits, it is the rightmost
- 15 bits that are used, and all upper bits are unused.
+ 15 bits that are used, and the other upper bits are unused.
+ OCaml ints are 31 bits on 32-bit platforms, which allows us to store
+ at least 2^31 or 2.1 billion items.
*)
-
+let bits_per_level = 5
+let branch_factor = 1 lsl bits_per_level
(* inner_mask is used to access items within a single level of the tree *)
let inner_mask = branch_factor - 1
-(* outer_mask is used to hop between levels of the tree *)
-let outer_mask = lnot inner_mask
type 'a node = {
(* Note: This array is always [branch_factor]-sized. *)
@@ 35,69 31,100 @@ type 'a t = {
count : int;
(* The root of the tree *)
root : 'a node;
- (* Height of the tree, but in bits instead of logical levels.
- Storing it this way allows for the logic below to not have to
- constantly be multiplying by bits_per_level when doing bitwise operations.
- Empty vector has height `bits_per_level`, i.e. this value is never zero.
+ (* Height of the vector, but in bits instead of logical levels.
+ Put another way, the number of bits that can be used to address this vector without
+ changing the height of the tree.
+ Storing it as bits allows for update logic to not have to
+ constantly be multiplying by bits_per_level.
+ Note that this includes the bits used to index the last-level array, such that the
+ empty vector has height `bits_per_level` (0 bits to find the last-level array,
+ `bits_per_level` to index into the last-level array).
+ So this value is always a positive multiple of `bits_per_level`.
*)
height : int;
(* The "tail" is an optimization to the vector that allows true-constant time
access to the end of the vector, in recognition of the fact that vectors
- are often pushed and popped at the end.
+ are often pushed and popped at the end. The tail array is not stored in the tree.
Typed as 'a node_item to match node type, but is logically 'a option array.
i.e., this should never hold Inner, only Leaf or Empty
- Note: This array will grow and shrink as the vector is updated, unlike the array
- inside 'a node.
+ This array will grow and shrink as the vector is updated, unlike the array
+ inside 'a node, but it will always be nonempty (other than for the empty vector)
+ to guarantee true constant time reads to the last elements of the vector.
*)
tail : 'a node_item array;
}
+let empty_node_value = { array = Array.make branch_factor Empty }
let empty_value = {
count = 0;
height = bits_per_level;
- root = { array = Array.make branch_factor Empty };
+ root = empty_node_value;
tail = [||];
}
+(* XXX: Extremely ugly!!
+ If I just expose `empty_value` directly, I get typecheck errors.
+ I believe this is because the typechecker wants only one concrete type
+ parameter 'a, and I don't know how to convince it that I can share one empty_value
+ for all possible type parameters 'a.
+ I think it's because the arrays
+ are technically mutable, but we never mutate them, so it's okay.
+ Use ugly casts in the meantime.
+ *)
+let empty_node () : 'a node =
+ Obj.magic empty_node_value
let empty () : 'a t =
- (* XXX: Extremely ugly!!
- If I just expose `empty_value` directly, I get typecheck errors.
- I believe this is because the typechecker wants only one concrete type
- parameter 'a, and I don't know how to convince it that I can share one empty_value
- for all possible type parameters 'a.
- I think it's because the arrays in empty_value
- are technically mutable, but we never mutate them, so it's okay.
- Use the ugly cast in the meantime.
- *)
Obj.magic empty_value
+let is_node_empty (node : 'a node) : bool =
+ node == (empty_node ())
+ || Array.for_all
+ (function
+ | Empty -> true
+ | _ -> false)
+ node.array
+
let length (t : 'a t) : int = t.count
-(** Returns the logical index represented by tail.(0) *)
+(** Returns the logical index held by [tail.(0)] *)
let tail_offset (t : 'a t) : int =
let count = (length t) in
if count < branch_factor then
0
else
- (count - 1) land outer_mask
+ let last_index = count - 1 in
+ last_index land (lnot inner_mask)
-(** Returns the array holding the element at logical index i *)
-let array_for (t : 'a t) (i : int) : 'a node_item array option =
+(** Returns [Some array] where [array] is the array of leaves
+ holding the element at logical index [i], or [None] if the given index
+ [i] is not in bounds.
+ *)
+let array_for
+ ?(verbose : bool = false)
+ (t : 'a t) (i : int) : 'a node_item array option =
+ if verbose then
+ Printf.eprintf "array_for %d:" i;
if i < 0 || i >= (length t) then
None
else if i >= (tail_offset t) then
+ let () = if verbose then
+ Printf.eprintf " tail\n" in
Some t.tail
else
let rec descend node height =
- if height <= 0 then
+ if Int.equal 0 height then
+ let () = if verbose then Printf.eprintf "\n" in
node.array
else
let idx = (i lsr height) land inner_mask in
+ let () = if verbose then
+ Printf.eprintf " -> %d" idx in
match node.array.(idx) with
| Inner node -> descend node (height - bits_per_level)
- | _ -> failwith "Invariant violation, should only have inner nodes in array_for"
+ | Leaf _
+ | Empty -> failwith "Invariant violation, should only have inner nodes in array_for"
in
Some (descend t.root t.height)
@@ 118,33 145,39 @@ let nth (t : 'a t) (index : int) : 'a =
let append (t : 'a t) (value : 'a) : 'a t =
let old_count = length t in
+ (* TODO make this logic more readable (there are a couple shortcuts being taken here *)
if old_count - (tail_offset t) < branch_factor then
- (* Create a new tail that is one longer *)
+ (* Can expand tail "in-place" without touching tree *)
let old_tail_length = Array.length t.tail in
let new_tail = Array.make (old_tail_length + 1) (Leaf value) in
Array.blit t.tail 0 new_tail 0 old_tail_length;
{ t with tail = new_tail; count = old_count + 1 }
else
(* No more room in tail, insert the tail into the tree and make a new tail *)
- let rec create_hierarchy height node =
+ (* Creates a path of Inner nodes [height] deep and places [node] there, returning
+ the parent of the new subtree. *)
+ let rec create_subtree height node =
if Int.equal 0 height then
node
else
let new_node = { array = Array.make branch_factor Empty } in
- let child = create_hierarchy (height - bits_per_level) node in
+ let child = create_subtree (height - bits_per_level) node in
new_node.array.(0) <- Inner child;
new_node
in
- let rec push_tail height parent tail =
+ (* Appends [node] at [height] depth into the subtree rooted at [parent],
+ copying the path and creating new subtrees where necessary along the way.
+ Returns the new version of [parent]. *)
+ let rec append_node height parent node =
let sub_index = ((old_count - 1) lsr height) land inner_mask in
let result = { array = Array.copy parent.array } in
let to_insert =
if Int.equal height bits_per_level then
- tail
+ node
else
match parent.array.(sub_index) with
- | Inner child -> push_tail (height - bits_per_level) child tail
- | Empty -> create_hierarchy (height - bits_per_level) tail
+ | Inner child -> append_node (height - bits_per_level) child node
+ | Empty -> create_subtree (height - bits_per_level) node
| Leaf _ -> failwith "Should not see leaves here"
in
result.array.(sub_index) <- Inner to_insert;
@@ 154,13 187,13 @@ let append (t : 'a t) (value : 'a) : 'a t =
let (root, height) =
if (old_count lsr bits_per_level) <= (1 lsl t.height) then
(* Tree doesn't need to grow taller *)
- let new_root = push_tail t.height t.root node_to_insert in
+ let new_root = append_node t.height t.root node_to_insert in
new_root, t.height
else
(* Tree needs to grow taller *)
let new_root = { array = Array.make branch_factor Empty } in
new_root.array.(0) <- Inner t.root;
- new_root.array.(1) <- Inner (create_hierarchy t.height node_to_insert);
+ new_root.array.(1) <- Inner (create_subtree t.height node_to_insert);
new_root, t.height + bits_per_level
in
{
@@ 206,13 239,65 @@ let pop_opt (t : 'a t) : 'a t option =
| 0 -> None
| 1 -> Some (empty ())
| count ->
- (* TODO: handle tree traversal, tail promotion *)
- if (count - 1) >= (tail_offset t) then
+ let last_index = count - 1 in
+ if last_index > (tail_offset t) then
(* Shrink tail *)
+ let () = Printf.eprintf "last_index %d, tail_offset %d, tail length %d\n"
+ last_index
+ (tail_offset t)
+ (Array.length t.tail)
+ in
let tail = Array.sub t.tail 0 (Array.length t.tail - 1) in
- Some { t with tail; count = t.count - 1 }
+ Some { t with tail; count = count - 1 }
else
- Some t
+ (* Popped last element of tail,
+ go into tree and pluck out an array to be the new tail.
+ *)
+ let new_last_index = last_index - 1 in
+ let array = array_for ~verbose:true t new_last_index |> Option.get in
+ (* Resize the array to be snug *)
+ let new_tail_len =
+ let rec f index =
+ match array.(index) with
+ | Leaf _ -> index + 1
+ | Empty -> f (index + 1)
+ | Inner _ -> failwith "Invariant violation, not expecting Inner in last-level array"
+ in
+ f 0
+ in
+ let new_tail = Array.sub array 0 new_tail_len in
+ (* Remove the new tail from the subtree rooted at [parent]
+ Returns the new version of [parent]
+ Inverse of [append_node] in [append]. *)
+ let rec remove parent height =
+ let child_index = (new_last_index lsr height) land inner_mask in
+ if Int.equal bits_per_level height then
+ let new_node = { array = Array.copy parent.array } in
+ let () = new_node.array.(child_index) <- Empty in
+ new_node
+ else
+ let old_child = match parent.array.(child_index) with
+ | Inner node -> node
+ | Leaf _ | Empty -> failwith "Invariant violation, only expecting Inner"
+ in
+ let child = remove old_child (height - bits_per_level) in
+ let new_node = { array = Array.copy parent.array } in
+ let () = new_node.array.(child_index) <-
+ if is_node_empty child then Empty else Inner child
+ in
+ if is_node_empty new_node then
+ empty_node ()
+ else
+ new_node
+ in
+ let new_root = remove t.root t.height in
+ (* Handle the case where the root is now redundant *)
+ let new_root =
+ match new_root.array.(0), new_root.array.(1) with
+ | Inner inner, Empty -> inner
+ | _ -> new_root
+ in
+ Some { t with root = new_root; tail = new_tail; count = count - 1; }
let pop (t : 'a t) : 'a t =
match pop_opt t with
@@ 6,19 6,33 @@ module Vector = Persistent.Vector
let test_insert_seq_sanity () =
let num = 1 lsl 20 in
let v = Seq.init num Fun.id |> Vector.of_seq in
- let expected = List.of_seq (Seq.init num Fun.id) in
+ let expected = List.init num Fun.id in
let actual = List.of_seq (Vector.to_seq v) in
assert (List.equal Int.equal actual expected)
-(* Tests that the vector is actually persistent, in that previous values are still usable
- and have the same value after performing updates
- *)
+let test_insert_remove () =
+ let num = 1 lsl 14 in
+ let v = Seq.init num Fun.id |> Vector.of_seq in
+ let f (acc : int Vector.t) (i : int) : int Vector.t =
+ Printf.eprintf "Iteration %d\n" i;
+ let next = Vector.pop acc in
+ (* let actual = List.of_seq (Vector.to_seq v) in
+ let expected = List.init (num - i - 1) Fun.id in
+ assert (List.equal Int.equal actual expected); *)
+ next
+ in
+ let v =
+ List.fold_left f v (List.init num Fun.id)
+ in
+ assert (Int.equal 0 (Vector.length v))
let all_tests = [
- ("Insert and Seq Sanity", test_insert_seq_sanity)
+ ("Insert and Seq Sanity", test_insert_seq_sanity);
+ ("Insert and Remove", test_insert_remove)
]
let main () =
+ Printexc.record_backtrace true;
List.iter
(fun (name, test_function) ->
Printf.eprintf "Running test: %s..." name;