A => .gitignore +1 -0
A => dune-project +26 -0
@@ 1,26 @@
+(lang dune 3.7)
+
+(name persistent)
+
+(generate_opam_files true)
+
+(source
+ (github username/reponame))
+
+(authors "Vincent Lee")
+
+(maintainers "Vincent Lee")
+
+(license LICENSE)
+
+(documentation https://url/to/documentation)
+
+(package
+ (name persistent)
+ (synopsis "Clojure-style persistent data structures, sequences, and transducers for OCaml")
+ (description "A longer description")
+ (depends ocaml dune)
+ (tags
+ (topics "to describe" your project)))
+
+; See the complete stanza docs at https://dune.readthedocs.io/en/stable/dune-files.html#dune-project
A => lib/dune +3 -0
@@ 1,3 @@
+(library
+ (public_name persistent)
+ (name persistent))
A => lib/vector.ml +213 -0
@@ 1,213 @@
+(* Persistent Vectors are implement as a wide-branching tree. Here, the branch factor
+ is chosen to be 32, which means at most 5 hops in the tree need to be made to find
+ the element in question.
+ TODO: How does this square with 32 bit OCaml only having 31 real bits available?
+ *)
+let bits_per_level = 5
+let branch_factor = 1 lsl bits_per_level
+
+(* Paths down into the tree are indexed by the bits of the logical index, left to right,
+ in groups of `bits_per_level`. Note that unused bits are kept on the left. That is,
+ if a given vector's size is small enough to be addressed with 15 bits, it is the rightmost
+ 15 bits that are used, and all upper bits are unused.
+ *)
+
+(* inner_mask is used to access items within a single level of the tree *)
+let inner_mask = branch_factor - 1
+(* outer_mask is used to hop between levels of the tree *)
+let outer_mask = lnot inner_mask
+
+type 'a node = {
+ array : 'a node_item array;
+ } [@@unboxed]
+and 'a node_item =
+ (* Actual data to be stored *)
+ | Leaf of 'a
+ (* Continue downwards into the tree *)
+ | Inner of 'a node
+ (* Nothing along this path *)
+ | Empty
+
+(* TODO: Figure out if I can make this covariant (see map.ml) *)
+type 'a t = {
+ (* The number of elements in the vector *)
+ count : int;
+ (* The root of the tree *)
+ root : 'a node;
+ (* Height of the tree, but in bits instead of logical levels.
+ Storing it this way allows for the logic below to not have to
+ constantly be multiplying by bits_per_level when doing bitwise operations.
+ Empty vector has height `bits_per_level`, i.e. this value is never zero.
+ *)
+ height : int;
+ (* The "tail" is an optimization to the vector that allows true-constant time
+ access to the end of the vector, in recognition of the fact that vectors
+ are often pushed and popped at the end.
+ Typed as 'a node_item to match node type, but is logically 'a option array.
+ i.e., this should never hold Inner, only Leaf or Empty *)
+ tail : 'a node_item array;
+ }
+
+let empty_value = {
+ count = 0;
+ height = bits_per_level;
+ root = { array = Array.make branch_factor Empty };
+ tail = [||];
+ }
+
+let empty () : 'a t =
+ (* XXX: Extremely ugly!!
+ If I just expose `empty_value` directly, I get typecheck errors.
+ I believe this is because the typechecker wants only one concrete type
+ parameter 'a, and I don't know how to convince it that I can share one empty_value
+ for all possible type parameters 'a.
+ I think it's because the arrays in empty_value
+ are technically mutable, but we never mutate them, so it's okay.
+ Use the ugly cast in the meantime.
+ *)
+ Obj.magic empty_value
+
+(** Returns the length of the vector *)
+let length (t : 'a t) : int = t.count
+
+(** Returns the logical index represented by tail.(0) *)
+let tail_offset (t : 'a t) : int =
+ let count = (length t) in
+ if count < branch_factor then
+ 0
+ else
+ (count - 1) land outer_mask
+
+(** Returns the array holding the element at logical index i *)
+let array_for (t : 'a t) (i : int) : 'a node_item array option =
+ if i < 0 || i >= (length t) then
+ None
+ else if i >= (tail_offset t) then
+ Some t.tail
+ else
+ let rec descend node height =
+ if height <= 0 then
+ node.array
+ else
+ let idx = (i lsr height) land inner_mask in
+ match node.array.(idx) with
+ | Inner node -> descend node (height - bits_per_level)
+ | _ -> failwith "Invariant violation, should only have inner nodes in array_for"
+ in
+ Some (descend t.root t.height)
+
+(** Gets the element at index, or None if out of bounds *)
+let nth_opt (t : 'a t) (index : int) : 'a option =
+ let last_level_index = index land inner_mask in
+ match array_for t index with
+ | Some array when last_level_index < (Array.length array) ->
+ (match array.(last_level_index) with
+ | Leaf value -> Some value
+ | Empty -> None
+ | Inner _ -> failwith "Invariant violation, not expecting Inner in last-level array")
+ | _ -> None
+
+(** Gets the element at index, or raises Invalid_argument if out of bounds *)
+let nth (t : 'a t) (index : int) : 'a =
+ match nth_opt t index with
+ | Some value -> value
+ | None -> invalid_arg "Index out of bounds"
+
+(** Appends an element to the vector *)
+let append (t : 'a t) (value : 'a) : 'a t =
+ let old_count = length t in
+ if old_count - (tail_offset t) < branch_factor then
+ (* *)
+ (* Create a new tail that is one longer.
+ Note: Array.init is no faster than this,
+ because it gets the zeroth element, fills the
+ array with that, then sets all the subsequent elements.
+ Do it manually here to avoid a closure allocation.
+ *)
+ let old_tail_length = Array.length t.tail in
+ let new_tail = Array.make (old_tail_length + 1) (Leaf value) in
+ Array.blit t.tail 0 new_tail 0 old_tail_length;
+ { t with tail = new_tail; count = old_count + 1 }
+ else
+ (* No more room in tail, insert the tail into the tree and make a new tail *)
+ let rec create_hierarchy height node =
+ if Int.equal 0 height then
+ node
+ else
+ let new_node = { array = Array.make branch_factor Empty } in
+ let child = create_hierarchy (height - bits_per_level) node in
+ new_node.array.(0) <- Inner child;
+ new_node
+ in
+ let rec push_tail height parent tail =
+ let sub_index = ((old_count - 1) lsr height) land inner_mask in
+ let result = { array = Array.copy parent.array } in
+ let to_insert =
+ if Int.equal height bits_per_level then
+ tail
+ else
+ match parent.array.(sub_index) with
+ | Inner child -> push_tail (height - bits_per_level) child tail
+ | Empty -> create_hierarchy (height - bits_per_level) tail
+ | Leaf _ -> failwith "Should not see leaves here"
+ in
+ result.array.(sub_index) <- Inner to_insert;
+ result
+ in
+ let node_to_insert = { array = t.tail } in
+ let (root, height) =
+ if (old_count lsr bits_per_level) <= (1 lsl t.height) then
+ (* Tree doesn't need to grow taller *)
+ let new_root = push_tail t.height t.root node_to_insert in
+ new_root, t.height
+ else
+ (* Tree needs to grow taller *)
+ let new_root = { array = Array.make branch_factor Empty } in
+ new_root.array.(0) <- Inner t.root;
+ new_root.array.(1) <- Inner (create_hierarchy t.height node_to_insert);
+ new_root, t.height + bits_per_level
+ in
+ {
+ root;
+ height;
+ count = old_count + 1;
+ tail = [|(Leaf value)|];
+ }
+
+(* Returns a new vector with index i associated to a. If i is equal to the current vector size, a is appended to the vector, expanding it.
+ If i is less than zero or greater than the current vector size, Invalid_argument is raised. *)
+let update (t : 'a t) (i : int) (value : 'a) : 'a t =
+ let count = length t in
+ if i < 0 || i > count then
+ invalid_arg "Out of bounds"
+ else if Int.equal i count then
+ (* TODO: reconsider this design choice of clojure? *)
+ append t value
+ else if i >= (tail_offset t) then
+ (* Replace in tail *)
+ let new_tail = Array.copy t.tail in
+ new_tail.(i land inner_mask) <- Leaf value;
+ { t with tail = new_tail }
+ else
+ (* Replace in tree by rebuilding the path down to the node *)
+ let rec rebuild node height =
+ let new_node = { array = Array.copy node.array } in
+ if Int.equal height 0 then
+ let () = new_node.array.(i land inner_mask) <- Leaf value in
+ new_node
+ else
+ let child_index = (i lsr height) land inner_mask in
+ let old_child = match node.array.(child_index) with
+ | Inner node -> node
+ | _ -> failwith "Invariant violation, only expecting Inner"
+ in
+ let child = rebuild old_child (height - bits_per_level) in
+ let () = new_node.array.(child_index) <- Inner child in
+ new_node
+ in
+ { t with root = rebuild t.root t.height }
+
+(** Dead-simple Sequence implementation.
+ TODO: optimize by holding onto the leaf so we don't have to walk down every time *)
+let to_seq (t : 'a t) : 'a Seq.t =
+ Seq.init t.count (fun i -> nth t i)
A => lib/vector.mli +20 -0
@@ 1,20 @@
+type 'a t
+
+(** Returns the length of the vector *)
+val length : 'a t -> int
+
+(** Returns the empty vector *)
+val empty : unit -> 'a t
+
+(** Gets the element at index, or None if out of bounds *)
+val nth_opt : 'a t -> int -> 'a option
+
+(** Gets the element at index, or raises Invalid_argument if out of bounds *)
+val nth : 'a t -> int -> 'a
+
+(** Appends an element to the vector *)
+val append : 'a t -> 'a -> 'a t
+
+val update : 'a t -> int -> 'a -> 'a t
+
+val to_seq : 'a t -> 'a Seq.t
A => persistent.opam +32 -0
@@ 1,32 @@
+# This file is generated by dune, edit dune-project instead
+opam-version: "2.0"
+synopsis:
+ "Clojure-style persistent data structures, sequences, and transducers for OCaml"
+description: "A longer description"
+maintainer: ["Vincent Lee"]
+authors: ["Vincent Lee"]
+license: "LICENSE"
+tags: ["topics" "to describe" "your" "project"]
+homepage: "https://github.com/username/reponame"
+doc: "https://url/to/documentation"
+bug-reports: "https://github.com/username/reponame/issues"
+depends: [
+ "ocaml"
+ "dune" {>= "3.7"}
+ "odoc" {with-doc}
+]
+build: [
+ ["dune" "subst"] {dev}
+ [
+ "dune"
+ "build"
+ "-p"
+ name
+ "-j"
+ jobs
+ "@install"
+ "@runtest" {with-test}
+ "@doc" {with-doc}
+ ]
+]
+dev-repo: "git+https://github.com/username/reponame.git"
A => test/dune +2 -0
@@ 1,2 @@
+(test
+ (name persistent))
A => test/persistent.ml +0 -0