Removal but not working yet

2 files changed,152insertions(+),53deletions(-) M lib/vector.ml M test/vectorTest.ml

M lib/vector.ml => lib/vector.ml +133 -48

@@ 1,21 1,17 @@(* Persistent Vectors are implement as a wide-branching tree. Here, the branch factor is chosen to be 32, which means at most 5 hops in the tree need to be made to find the element in question. TODO: How does this square with 32 bit OCaml only having 31 real bits available? *) let bits_per_level = 5 let branch_factor = 1 lsl bits_per_level (* Paths down into the tree are indexed by the bits of the logical index, left to right, is chosen to be 32. Elements are stored only at the leaves. Paths down into the tree are indexed by the bits of the logical index, left to right, in groups of `bits_per_level`. Note that unused bits are kept on the left. That is, if a given vector's size is small enough to be addressed with 15 bits, it is the rightmost 15 bits that are used, and all upper bits are unused. 15 bits that are used, and the other upper bits are unused. OCaml ints are 31 bits on 32-bit platforms, which allows us to store at least 2^31 or 2.1 billion items. *) let bits_per_level = 5 let branch_factor = 1 lsl bits_per_level (* inner_mask is used to access items within a single level of the tree *) let inner_mask = branch_factor - 1 (* outer_mask is used to hop between levels of the tree *) let outer_mask = lnot inner_mask type 'a node = { (* Note: This array is always [branch_factor]-sized. *)@@ 35,69 31,100 @@ type 'a t = {count : int; (* The root of the tree *) root : 'a node; (* Height of the tree, but in bits instead of logical levels. Storing it this way allows for the logic below to not have to constantly be multiplying by bits_per_level when doing bitwise operations. Empty vector has height `bits_per_level`, i.e. this value is never zero. (* Height of the vector, but in bits instead of logical levels. Put another way, the number of bits that can be used to address this vector without changing the height of the tree. Storing it as bits allows for update logic to not have to constantly be multiplying by bits_per_level. Note that this includes the bits used to index the last-level array, such that the empty vector has height `bits_per_level` (0 bits to find the last-level array, `bits_per_level` to index into the last-level array). So this value is always a positive multiple of `bits_per_level`. *) height : int; (* The "tail" is an optimization to the vector that allows true-constant time access to the end of the vector, in recognition of the fact that vectors are often pushed and popped at the end. are often pushed and popped at the end. The tail array is not stored in the tree. Typed as 'a node_item to match node type, but is logically 'a option array. i.e., this should never hold Inner, only Leaf or Empty Note: This array will grow and shrink as the vector is updated, unlike the array inside 'a node. This array will grow and shrink as the vector is updated, unlike the array inside 'a node, but it will always be nonempty (other than for the empty vector) to guarantee true constant time reads to the last elements of the vector. *) tail : 'a node_item array; } let empty_node_value = { array = Array.make branch_factor Empty } let empty_value = { count = 0; height = bits_per_level; root = { array = Array.make branch_factor Empty }; root = empty_node_value; tail = [||]; } (* XXX: Extremely ugly!! If I just expose `empty_value` directly, I get typecheck errors. I believe this is because the typechecker wants only one concrete type parameter 'a, and I don't know how to convince it that I can share one empty_value for all possible type parameters 'a. I think it's because the arrays are technically mutable, but we never mutate them, so it's okay. Use ugly casts in the meantime. *) let empty_node () : 'a node = Obj.magic empty_node_value let empty () : 'a t = (* XXX: Extremely ugly!! If I just expose `empty_value` directly, I get typecheck errors. I believe this is because the typechecker wants only one concrete type parameter 'a, and I don't know how to convince it that I can share one empty_value for all possible type parameters 'a. I think it's because the arrays in empty_value are technically mutable, but we never mutate them, so it's okay. Use the ugly cast in the meantime. *) Obj.magic empty_value let is_node_empty (node : 'a node) : bool = node == (empty_node ()) || Array.for_all (function | Empty -> true | _ -> false) node.array let length (t : 'a t) : int = t.count (** Returns the logical index represented by tail.(0) *) (** Returns the logical index held by [tail.(0)] *) let tail_offset (t : 'a t) : int = let count = (length t) in if count < branch_factor then 0 else (count - 1) land outer_mask let last_index = count - 1 in last_index land (lnot inner_mask) (** Returns the array holding the element at logical index i *) let array_for (t : 'a t) (i : int) : 'a node_item array option = (** Returns [Some array] where [array] is the array of leaves holding the element at logical index [i], or [None] if the given index [i] is not in bounds. *) let array_for ?(verbose : bool = false) (t : 'a t) (i : int) : 'a node_item array option = if verbose then Printf.eprintf "array_for %d:" i; if i < 0 || i >= (length t) then None else if i >= (tail_offset t) then let () = if verbose then Printf.eprintf " tail\n" in Some t.tail else let rec descend node height = if height <= 0 then if Int.equal 0 height then let () = if verbose then Printf.eprintf "\n" in node.array else let idx = (i lsr height) land inner_mask in let () = if verbose then Printf.eprintf " -> %d" idx in match node.array.(idx) with | Inner node -> descend node (height - bits_per_level) | _ -> failwith "Invariant violation, should only have inner nodes in array_for" | Leaf _ | Empty -> failwith "Invariant violation, should only have inner nodes in array_for" in Some (descend t.root t.height)@@ 118,33 145,39 @@ let nth (t : 'a t) (index : int) : 'a =let append (t : 'a t) (value : 'a) : 'a t = let old_count = length t in (* TODO make this logic more readable (there are a couple shortcuts being taken here *) if old_count - (tail_offset t) < branch_factor then (* Create a new tail that is one longer *) (* Can expand tail "in-place" without touching tree *) let old_tail_length = Array.length t.tail in let new_tail = Array.make (old_tail_length + 1) (Leaf value) in Array.blit t.tail 0 new_tail 0 old_tail_length; { t with tail = new_tail; count = old_count + 1 } else (* No more room in tail, insert the tail into the tree and make a new tail *) let rec create_hierarchy height node = (* Creates a path of Inner nodes [height] deep and places [node] there, returning the parent of the new subtree. *) let rec create_subtree height node = if Int.equal 0 height then node else let new_node = { array = Array.make branch_factor Empty } in let child = create_hierarchy (height - bits_per_level) node in let child = create_subtree (height - bits_per_level) node in new_node.array.(0) <- Inner child; new_node in let rec push_tail height parent tail = (* Appends [node] at [height] depth into the subtree rooted at [parent], copying the path and creating new subtrees where necessary along the way. Returns the new version of [parent]. *) let rec append_node height parent node = let sub_index = ((old_count - 1) lsr height) land inner_mask in let result = { array = Array.copy parent.array } in let to_insert = if Int.equal height bits_per_level then tail node else match parent.array.(sub_index) with | Inner child -> push_tail (height - bits_per_level) child tail | Empty -> create_hierarchy (height - bits_per_level) tail | Inner child -> append_node (height - bits_per_level) child node | Empty -> create_subtree (height - bits_per_level) node | Leaf _ -> failwith "Should not see leaves here" in result.array.(sub_index) <- Inner to_insert;@@ 154,13 187,13 @@ let append (t : 'a t) (value : 'a) : 'a t =let (root, height) = if (old_count lsr bits_per_level) <= (1 lsl t.height) then (* Tree doesn't need to grow taller *) let new_root = push_tail t.height t.root node_to_insert in let new_root = append_node t.height t.root node_to_insert in new_root, t.height else (* Tree needs to grow taller *) let new_root = { array = Array.make branch_factor Empty } in new_root.array.(0) <- Inner t.root; new_root.array.(1) <- Inner (create_hierarchy t.height node_to_insert); new_root.array.(1) <- Inner (create_subtree t.height node_to_insert); new_root, t.height + bits_per_level in {@@ 206,13 239,65 @@ let pop_opt (t : 'a t) : 'a t option =| 0 -> None | 1 -> Some (empty ()) | count -> (* TODO: handle tree traversal, tail promotion *) if (count - 1) >= (tail_offset t) then let last_index = count - 1 in if last_index > (tail_offset t) then (* Shrink tail *) let () = Printf.eprintf "last_index %d, tail_offset %d, tail length %d\n" last_index (tail_offset t) (Array.length t.tail) in let tail = Array.sub t.tail 0 (Array.length t.tail - 1) in Some { t with tail; count = t.count - 1 } Some { t with tail; count = count - 1 } else Some t (* Popped last element of tail, go into tree and pluck out an array to be the new tail. *) let new_last_index = last_index - 1 in let array = array_for ~verbose:true t new_last_index |> Option.get in (* Resize the array to be snug *) let new_tail_len = let rec f index = match array.(index) with | Leaf _ -> index + 1 | Empty -> f (index + 1) | Inner _ -> failwith "Invariant violation, not expecting Inner in last-level array" in f 0 in let new_tail = Array.sub array 0 new_tail_len in (* Remove the new tail from the subtree rooted at [parent] Returns the new version of [parent] Inverse of [append_node] in [append]. *) let rec remove parent height = let child_index = (new_last_index lsr height) land inner_mask in if Int.equal bits_per_level height then let new_node = { array = Array.copy parent.array } in let () = new_node.array.(child_index) <- Empty in new_node else let old_child = match parent.array.(child_index) with | Inner node -> node | Leaf _ | Empty -> failwith "Invariant violation, only expecting Inner" in let child = remove old_child (height - bits_per_level) in let new_node = { array = Array.copy parent.array } in let () = new_node.array.(child_index) <- if is_node_empty child then Empty else Inner child in if is_node_empty new_node then empty_node () else new_node in let new_root = remove t.root t.height in (* Handle the case where the root is now redundant *) let new_root = match new_root.array.(0), new_root.array.(1) with | Inner inner, Empty -> inner | _ -> new_root in Some { t with root = new_root; tail = new_tail; count = count - 1; } let pop (t : 'a t) : 'a t = match pop_opt t with

M test/vectorTest.ml => test/vectorTest.ml +19 -5

@@ 6,19 6,33 @@ module Vector = Persistent.Vectorlet test_insert_seq_sanity () = let num = 1 lsl 20 in let v = Seq.init num Fun.id |> Vector.of_seq in let expected = List.of_seq (Seq.init num Fun.id) in let expected = List.init num Fun.id in let actual = List.of_seq (Vector.to_seq v) in assert (List.equal Int.equal actual expected) (* Tests that the vector is actually persistent, in that previous values are still usable and have the same value after performing updates *) let test_insert_remove () = let num = 1 lsl 14 in let v = Seq.init num Fun.id |> Vector.of_seq in let f (acc : int Vector.t) (i : int) : int Vector.t = Printf.eprintf "Iteration %d\n" i; let next = Vector.pop acc in (* let actual = List.of_seq (Vector.to_seq v) in let expected = List.init (num - i - 1) Fun.id in assert (List.equal Int.equal actual expected); *) next in let v = List.fold_left f v (List.init num Fun.id) in assert (Int.equal 0 (Vector.length v)) let all_tests = [ ("Insert and Seq Sanity", test_insert_seq_sanity) ("Insert and Seq Sanity", test_insert_seq_sanity); ("Insert and Remove", test_insert_remove) ] let main () = Printexc.record_backtrace true; List.iter (fun (name, test_function) -> Printf.eprintf "Running test: %s..." name;