~williewillus/persistent

253ea25c8fbef8c94492e665b34656bcafd5cf08 — Vincent Lee 1 year, 7 months ago e830ba3
Removal but not working yet
2 files changed, 152 insertions(+), 53 deletions(-)

M lib/vector.ml
M test/vectorTest.ml
M lib/vector.ml => lib/vector.ml +133 -48
@@ 1,21 1,17 @@
(* Persistent Vectors are implement as a wide-branching tree. Here, the branch factor
   is chosen to be 32, which means at most 5 hops in the tree need to be made to find
   the element in question.
   TODO: How does this square with 32 bit OCaml only having 31 real bits available?
 *)
let bits_per_level = 5
let branch_factor = 1 lsl bits_per_level

(* Paths down into the tree are indexed by the bits of the logical index, left to right,
   is chosen to be 32.
   Elements are stored only at the leaves.
   Paths down into the tree are indexed by the bits of the logical index, left to right,
   in groups of `bits_per_level`. Note that unused bits are kept on the left. That is,
   if a given vector's size is small enough to be addressed with 15 bits, it is the rightmost
   15 bits that are used, and all upper bits are unused.
   15 bits that are used, and the other upper bits are unused.
   OCaml ints are 31 bits on 32-bit platforms, which allows us to store
   at least 2^31 or 2.1 billion items.
 *)

let bits_per_level = 5
let branch_factor = 1 lsl bits_per_level
(* inner_mask is used to access items within a single level of the tree *)
let inner_mask = branch_factor - 1
(* outer_mask is used to hop between levels of the tree *)
let outer_mask = lnot inner_mask

type 'a node = {
    (* Note: This array is always [branch_factor]-sized. *)


@@ 35,69 31,100 @@ type 'a t = {
    count : int;
    (* The root of the tree *)
    root : 'a node;
    (* Height of the tree, but in bits instead of logical levels.
       Storing it this way allows for the logic below to not have to
       constantly be multiplying by bits_per_level when doing bitwise operations.
       Empty vector has height `bits_per_level`, i.e. this value is never zero.
    (* Height of the vector, but in bits instead of logical levels.
       Put another way, the number of bits that can be used to address this vector without
       changing the height of the tree.
       Storing it as bits allows for update logic to not have to
       constantly be multiplying by bits_per_level.
       Note that this includes the bits used to index the last-level array, such that the
       empty vector has height `bits_per_level` (0 bits to find the last-level array,
       `bits_per_level` to index into the last-level array).
       So this value is always a positive multiple of `bits_per_level`.
     *)
    height : int;
    (* The "tail" is an optimization to the vector that allows true-constant time
       access to the end of the vector, in recognition of the fact that vectors
       are often pushed and popped at the end.
       are often pushed and popped at the end. The tail array is not stored in the tree.
       
       Typed as 'a node_item to match node type, but is logically 'a option array.
       i.e., this should never hold Inner, only Leaf or Empty
       
       Note: This array will grow and shrink as the vector is updated, unlike the array
       inside 'a node.
       This array will grow and shrink as the vector is updated, unlike the array
       inside 'a node, but it will always be nonempty (other than for the empty vector)
       to guarantee true constant time reads to the last elements of the vector.
     *)
    tail : 'a node_item array;
  }

let empty_node_value = { array = Array.make branch_factor Empty }
let empty_value = {
    count = 0;
    height = bits_per_level;
    root = { array = Array.make branch_factor Empty };
    root = empty_node_value;
    tail = [||];
  }

(* XXX: Extremely ugly!!
   If I just expose `empty_value` directly, I get typecheck errors.
   I believe this is because the typechecker wants only one concrete type
   parameter 'a, and I don't know how to convince it that I can share one empty_value
   for all possible type parameters 'a.
   I think it's because the arrays
   are technically mutable, but we never mutate them, so it's okay.
   Use ugly casts in the meantime.
 *)
let empty_node () : 'a node =
  Obj.magic empty_node_value
let empty () : 'a t  =
  (* XXX: Extremely ugly!!
     If I just expose `empty_value` directly, I get typecheck errors.
     I believe this is because the typechecker wants only one concrete type
     parameter 'a, and I don't know how to convince it that I can share one empty_value
     for all possible type parameters 'a.
     I think it's because the arrays in empty_value
     are technically mutable, but we never mutate them, so it's okay.
     Use the ugly cast in the meantime.
   *)
  Obj.magic empty_value

let is_node_empty (node : 'a node) : bool =
  node == (empty_node ())
  || Array.for_all
       (function
        | Empty -> true
        | _ -> false)
       node.array

let length (t : 'a t) : int = t.count

(** Returns the logical index represented by tail.(0) *)
(** Returns the logical index held by [tail.(0)] *)
let tail_offset (t : 'a t) : int =
  let count = (length t) in
  if count < branch_factor then
    0
  else
    (count - 1) land outer_mask
    let last_index = count - 1 in
    last_index land (lnot inner_mask)

(** Returns the array holding the element at logical index i *)
let array_for (t : 'a t) (i : int) : 'a node_item array option =
(** Returns [Some array] where [array] is the array of leaves
    holding the element at logical index [i], or [None] if the given index
    [i] is not in bounds.
 *)
let array_for
      ?(verbose : bool = false)
      (t : 'a t) (i : int) : 'a node_item array option =
  if verbose then
    Printf.eprintf "array_for %d:" i;
  if i < 0 || i >= (length t) then
    None
  else if i >= (tail_offset t) then
    let () = if verbose then
               Printf.eprintf " tail\n" in
    Some t.tail
  else
    let rec descend node height =
      if height <= 0 then
      if Int.equal 0 height then
        let () = if verbose then Printf.eprintf "\n" in
        node.array
      else
        let idx = (i lsr height) land inner_mask in
        let () = if verbose then
                   Printf.eprintf " -> %d" idx in
        match node.array.(idx) with
        | Inner node -> descend node (height - bits_per_level)
        | _ -> failwith "Invariant violation, should only have inner nodes in array_for"
        | Leaf _
          | Empty -> failwith "Invariant violation, should only have inner nodes in array_for"
    in
    Some (descend t.root t.height)



@@ 118,33 145,39 @@ let nth (t : 'a t) (index : int) : 'a =

let append (t : 'a t) (value : 'a) : 'a t =
  let old_count = length t in
  (* TODO make this logic more readable (there are a couple shortcuts being taken here *)
  if old_count - (tail_offset t) < branch_factor then
    (* Create a new tail that is one longer *)
    (* Can expand tail "in-place" without touching tree *)
    let old_tail_length = Array.length t.tail in
    let new_tail = Array.make (old_tail_length + 1) (Leaf value) in
    Array.blit t.tail 0 new_tail 0 old_tail_length;
    { t with tail = new_tail; count = old_count + 1 }
  else
    (* No more room in tail, insert the tail into the tree and make a new tail *)
    let rec create_hierarchy height node =
    (* Creates a path of Inner nodes [height] deep and places [node] there, returning
       the parent of the new subtree. *)
    let rec create_subtree height node =
      if Int.equal 0 height then
        node
      else
        let new_node = { array = Array.make branch_factor Empty } in
        let child = create_hierarchy (height - bits_per_level) node in
        let child = create_subtree (height - bits_per_level) node in
        new_node.array.(0) <- Inner child;
        new_node
    in
    let rec push_tail height parent tail =
    (* Appends [node] at [height] depth into the subtree rooted at [parent],
       copying the path and creating new subtrees where necessary along the way.
       Returns the new version of [parent]. *)
    let rec append_node height parent node =
      let sub_index = ((old_count - 1) lsr height) land inner_mask in
      let result = { array = Array.copy parent.array } in
      let to_insert =
        if Int.equal height bits_per_level then
          tail
          node
        else
          match parent.array.(sub_index) with
          | Inner child -> push_tail (height - bits_per_level) child tail
          | Empty -> create_hierarchy (height - bits_per_level) tail
          | Inner child -> append_node (height - bits_per_level) child node
          | Empty -> create_subtree (height - bits_per_level) node
          | Leaf _ -> failwith "Should not see leaves here"
      in
      result.array.(sub_index) <- Inner to_insert;


@@ 154,13 187,13 @@ let append (t : 'a t) (value : 'a) : 'a t =
    let (root, height) =
      if (old_count lsr bits_per_level) <= (1 lsl t.height) then
        (* Tree doesn't need to grow taller *)
        let new_root = push_tail t.height t.root node_to_insert in
        let new_root = append_node t.height t.root node_to_insert in
        new_root, t.height
      else
        (* Tree needs to grow taller *)
        let new_root = { array = Array.make branch_factor Empty } in
        new_root.array.(0) <- Inner t.root;
        new_root.array.(1) <- Inner (create_hierarchy t.height node_to_insert);
        new_root.array.(1) <- Inner (create_subtree t.height node_to_insert);
        new_root, t.height + bits_per_level
    in
    {


@@ 206,13 239,65 @@ let pop_opt (t : 'a t) : 'a t option =
  | 0 -> None
  | 1 -> Some (empty ())
  | count ->
     (* TODO: handle tree traversal, tail promotion *)
     if (count - 1) >= (tail_offset t) then
     let last_index = count - 1 in
     if last_index > (tail_offset t) then
       (* Shrink tail *)
       let () = Printf.eprintf "last_index %d, tail_offset %d, tail length %d\n"
                  last_index
                  (tail_offset t)
                  (Array.length t.tail)
       in
       let tail = Array.sub t.tail 0 (Array.length t.tail - 1) in
       Some { t with tail; count = t.count - 1 }
       Some { t with tail; count = count - 1 }
     else
       Some t
       (* Popped last element of tail,
          go into tree and pluck out an array to be the new tail.
        *)
       let new_last_index = last_index - 1 in
       let array = array_for ~verbose:true t new_last_index |> Option.get in
       (* Resize the array to be snug *)
       let new_tail_len =
         let rec f index =
           match array.(index) with
           | Leaf _ -> index + 1
           | Empty -> f (index + 1)
           | Inner _ -> failwith "Invariant violation, not expecting Inner in last-level array"
         in
         f 0
       in
       let new_tail = Array.sub array 0 new_tail_len in
       (* Remove the new tail from the subtree rooted at [parent]
          Returns the new version of [parent]
          Inverse of [append_node] in [append]. *)
       let rec remove parent height =
         let child_index = (new_last_index lsr height) land inner_mask in
         if Int.equal bits_per_level height then
           let new_node = { array = Array.copy parent.array } in
           let () = new_node.array.(child_index) <- Empty in
           new_node
         else
           let old_child = match parent.array.(child_index) with
             | Inner node -> node
             | Leaf _ | Empty -> failwith "Invariant violation, only expecting Inner"
           in
           let child = remove old_child (height - bits_per_level) in
           let new_node = { array = Array.copy parent.array } in
           let () = new_node.array.(child_index) <-
                      if is_node_empty child then Empty else Inner child
           in
           if is_node_empty new_node then
             empty_node ()
           else
             new_node
       in
       let new_root = remove t.root t.height in
       (* Handle the case where the root is now redundant *)
       let new_root =
         match new_root.array.(0), new_root.array.(1) with
         | Inner inner, Empty -> inner
         | _ -> new_root
       in
       Some { t with root = new_root; tail = new_tail; count = count - 1; }

let pop (t : 'a t) : 'a t =
  match pop_opt t with

M test/vectorTest.ml => test/vectorTest.ml +19 -5
@@ 6,19 6,33 @@ module Vector = Persistent.Vector
let test_insert_seq_sanity () =
  let num = 1 lsl 20 in
  let v = Seq.init num Fun.id |> Vector.of_seq in
  let expected = List.of_seq (Seq.init num Fun.id) in
  let expected = List.init num Fun.id in
  let actual = List.of_seq (Vector.to_seq v) in
  assert (List.equal Int.equal actual expected)

(* Tests that the vector is actually persistent, in that previous values are still usable
   and have the same value after performing updates
 *)
let test_insert_remove () =
  let num = 1 lsl 14 in
  let v = Seq.init num Fun.id |> Vector.of_seq in
  let f (acc : int Vector.t) (i : int) : int Vector.t =
    Printf.eprintf "Iteration %d\n" i;
    let next = Vector.pop acc in
    (* let actual = List.of_seq (Vector.to_seq v) in
    let expected = List.init (num - i - 1) Fun.id in
    assert (List.equal Int.equal actual expected); *)
    next
  in
  let v =
    List.fold_left f v (List.init num Fun.id)
  in
  assert (Int.equal 0 (Vector.length v))

let all_tests = [
    ("Insert and Seq Sanity", test_insert_seq_sanity)
    ("Insert and Seq Sanity", test_insert_seq_sanity);
    ("Insert and Remove", test_insert_remove)
  ]

let main () =
  Printexc.record_backtrace true;
  List.iter
    (fun (name, test_function) ->
      Printf.eprintf "Running test: %s..." name;