Skip to content

Bucketed trees #13

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
124 changes: 124 additions & 0 deletions src/functors.ml
Original file line number Diff line number Diff line change
Expand Up @@ -1306,3 +1306,127 @@ module MakeHashconsedMap(Key: KEY)(Value: HASHED_VALUE)() = struct
let compare = Node.compare
let to_int = Node.to_int
end

module MakeBucketedHeterogeneous
(Map: BASE_MAP_INTERFACE)
(Buckets: sig
val nb_buckets : int
val bucket_id : 'a Map.key -> int
end) = struct
include Map
type 'map t = 'map Map.t array

let get = Array.unsafe_get
let set = Array.unsafe_set

let empty = Obj.magic (Array.make Buckets.nb_buckets Map.empty)
(* This Obj.magic is required to avoid the '_weak type.
It is correct as long as we never mutate empty without copying it first. *)
let empty : 'a t = empty

let is_empty arr = Array.for_all Map.is_empty arr
let cardinal arr = Array.fold_left (fun a b -> a + Map.cardinal b) 0 arr

let singleton key value =
let a = Array.copy empty in
set a (Buckets.bucket_id key) (Map.singleton key value);
a
let is_singleton map =
let exception NotSingleton in
try
Array.fold_left (fun prev map -> match prev, Map.is_singleton map with
| None, x -> x
| Some _, None -> prev
| Some _, Some _ -> raise NotSingleton
) None map
with NotSingleton -> None

let find key arr = Map.find key (get arr (Buckets.bucket_id key))
let find_opt key arr = Map.find_opt key (get arr (Buckets.bucket_id key))
let mem key arr = Map.mem key (get arr (Buckets.bucket_id key))

let update key fvalue arr =
let bucket = Buckets.bucket_id key in
let old_map = get arr bucket in
let new_map = Map.update key fvalue old_map in
if old_map == new_map
then arr
else
let arr = Array.copy arr in
set arr bucket new_map;
arr
let add key value arr = update key (fun _ -> Some value) arr
let insert key f arr = update key (fun x -> Some (f x)) arr
let remove key map = update key (fun _ -> None) map

let iter f arr = Array.iter (Map.iter f) arr
let map f arr = Array.map (Map.map f) arr
let mapi f arr = Array.map (Map.mapi f) arr
let map_no_share f arr = Array.map (Map.map_no_share f) arr
let mapi_no_share f arr = Array.map (Map.mapi_no_share f) arr
let filter f arr = Array.map (Map.filter f) arr
let filter_map f arr = Array.map (Map.filter_map f) arr
let filter_map_no_share f arr = Array.map (Map.filter_map_no_share f) arr
let fold f arr elt = Array.fold_left (fun elt map -> Map.fold f map elt) elt arr
let for_all f arr = Array.for_all (Map.for_all f) arr

let array_fold_left_2 f a1 a2 acc =
let rec aux i acc =
if i = Buckets.nb_buckets then acc
else aux (i+1) (f (get a1 i) (get a2 i) acc)
in aux 0 acc

let fold_on_nonequal_inter f arr1 arr2 elt = array_fold_left_2 (Map.fold_on_nonequal_inter f) arr1 arr2 elt
let fold_on_nonequal_union f arr1 arr2 elt = array_fold_left_2 (Map.fold_on_nonequal_union f) arr1 arr2 elt

let pretty ?(pp_sep=Format.pp_print_cut) pp fmt m =
(* Compact version of array, with empty elements removed
this avoids extra calls to pp_sep *)
let non_empty = Array.fold_right (fun elt lst ->
if Map.is_empty elt then lst else elt::lst) m []
in
Format.pp_print_list ~pp_sep (Map.pretty ~pp_sep pp) fmt non_empty

let reflexive_same_domain_for_all2 f a b = Array.for_all2 (Map.reflexive_same_domain_for_all2 f) a b
let reflexive_subset_domain_for_all2 f a b = Array.for_all2 (Map.reflexive_subset_domain_for_all2 f) a b
let nonreflexive_same_domain_for_all2 f a b = Array.for_all2 (Map.nonreflexive_same_domain_for_all2 f) a b
let disjoint a b = Array.for_all2 Map.disjoint a b

let idempotent_union f a b = Array.map2 (Map.idempotent_union f) a b
let idempotent_inter f a b = Array.map2 (Map.idempotent_inter f) a b
let nonidempotent_inter_no_share f a b = Array.map2 (Map.nonidempotent_inter_no_share f) a b
let idempotent_inter_filter f a b = Array.map2 (Map.idempotent_inter_filter f) a b
let slow_merge f a b = Array.map2 (Map.slow_merge f) a b
let difference f a b = Array.map2 (Map.difference f) a b
let symmetric_difference f a b = Array.map2 (Map.symmetric_difference f) a b
let split k m =
let s = Array.map (Map.split k) m in (
Array.map (fun (l, _, _) -> l) s,
Array.find_map (fun (_, e, _) -> e) s,
Array.map (fun (_, _, r) -> r) s
)
let to_seq m = Seq.flat_map Map.to_seq (Array.to_seq m)

let array_rev_seq a =
let rec aux i () =
if i >= 0
then Seq.Cons (get a i, aux (i-1))
else Seq.Nil
in aux (Buckets.nb_buckets - 1)
let to_rev_seq m = Seq.flat_map Map.to_rev_seq (array_rev_seq m)
let rec add_seq: type a. a key_value_pair Seq.t -> a t -> a t =
fun s m -> match s () with
| Seq.Nil -> m
| Seq.Cons(KeyValue(key,value), s) -> add_seq s (add key value m)
let of_seq s = add_seq s empty
let of_list l = of_seq (List.to_seq l)
let to_list m = List.of_seq (to_seq m)

let get_bucket arr i = get arr i
let set_bucket arr i m =
if m == get arr i then arr
else
let arr = Array.copy arr in
set arr i m;
arr
end
60 changes: 60 additions & 0 deletions src/functors.mli
Original file line number Diff line number Diff line change
Expand Up @@ -202,3 +202,63 @@ module MakeHashconsedHeterogeneousMap(Key: HETEROGENEOUS_KEY)(Value: HETEROGENEO

include HASH_CONSED_OPERATIONS with type 'a t := 'a t (** @inline *)
end

(** {1:bucketed Bucketed maps and sets} *)
(** Bucketed maps replace one large map with
a fixed number [nb_buckets] of smaller maps, using a
[bucket_id : key -> 0 .. (nb_buckets - 1)] function which
assigns to each key to a unique bucket.

This can offer performance improvements because the map operations are
performed on smaller maps. Especially if one bucket is small and frequently
accessed, and the others are much larger.

Bucketed maps are created on top of a regular maps, so they can be used with
any of the previous map or sets. Their interface is a bit simpler as they
aren't true Patricia trees anymore:
- They lack the {!NODE.val-view} functions and the constructors ({!NODE.branch}, {!NODE.leaf}...)
As such, they can't be used for cross map/set operations in the
{{!HETEROGENEOUS_MAP.WithForeign}[WithForeign]} functors.
- Iteration is no longer guaranteed to be in increasing order of {!KEY.to_int},
so the {{!BASE_MAP.unsigned_min_binding}[unsigned_min_binding]} and
{{!BASE_MAP.pop_unsigned_minimum}[pop_unsigned_minimum]} functions have been
removed. The other iteration functions are still here, but keep in mind
they no longer iterate in the order of {!KEY.to_int}. Instead, they
usually iterate on bucket order first, and then using {!KEY.to_int}
within a bucket. *)

(** Create a bucketed map on top of the given map/set

@since 0.11.0 *)
module MakeBucketedHeterogeneous
(Map: BASE_MAP_INTERFACE)
(Buckets: sig
val nb_buckets : int
(** The total number of buckets,
should be strictly positive and relatively small
(usually between 4 and 20). *)

val bucket_id : 'a Map.key -> int
(** The bucket number of the given key,
{b must be between [0] and [nb_buckets-1] inclusive}.
Not respecting this constraint will lead to runtime errors, as we use it as
an index for [Array.unsafe_get] and [Array.unsafe_set]. *)
end) : sig
include BASE_MAP_INTERFACE
with type 'key key = 'key Map.key
and type ('key, 'map) value = ('key, 'map) Map.value

val get_bucket : 'a t -> int -> 'a Map.t
(** [get_bucket a i] returns the [i]-th bucket, that is the submap of [a] containing
only the keys [k] which satisfy [bucket_id k = i]. This is equivalent to
{{!filter}[filter {f=fun k _ -> Buckets.bucket_id k = i}) m]}, but constant time instead
of [O(n)].

{b [i] must be between [0] and [nb_buckets-1] inclusive.} *)

val set_bucket : 'a t -> int -> 'a Map.t -> 'a t
(** [set_bucket a i m] changes the [i]-th bucket to [m].
{b [m] should only contain keys [k] which satisfy [bucket_id k = i]. }

{b [i] must be between [0] and [nb_buckets-1] inclusive.} *)
end
13 changes: 11 additions & 2 deletions src/index.mld
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,16 @@ See the {{!examples}examples} to jump right into using this library.
be extended to store size information in nodes if needed.}
{li Exposes a common interface ({!type:PatriciaTree.NODE.view}) to allow users to write their own pattern
matching on the tree structure without depending on the {{!PatriciaTree.NODE}[NODE]} being used.}
{li Additionally, hashconsed versions of heterogeneous/homogeneous maps/sets are
{li Additionally, {{!PatriciaTree.section-hash_consed}hashconsed versions of heterogeneous/homogeneous maps/sets} are
available. These provide constant time equality and comparison, and ensure
maps/set with the same constants are always physically equal. It comes at the cost
of a constant overhead in memory usage (at worst, as hash-consing may allow
memory gains) and constant time overhead when calling constructors.}}
memory gains) and constant time overhead when calling constructors.}
{li Finally, we allow {{!PatriciaTree.section-bucketed}bucketing maps and sets}.
i.e. splitting a large map into [n] smaller ones using a [bucket_id : key -> 0 .. (n-1)]
function. This allows some speed-up, especially if you know you will be
repeatedly accessing a small bucket.
}}

{1 Quick overview}

Expand Down Expand Up @@ -137,6 +142,10 @@ The functors used to build maps and sets are the following:
create a new hash-table to store the created nodes. Calling a functor
twice with same arguments will lead to two numbering systems for identifiers,
and thus the types should not be considered compatible.
}
{li Bucketed maps and sets are built on top of any of the previous maps
using the {{!PatriciaTree.MakeBucketedHeterogeneous}MakeBucketedHeterogeneous}
functor
}}

{2 Interfaces}
Expand Down
Loading
Loading