Skip to content

Commit 2b0d469

Browse files
committed
update BinarySearch, Shuffle, AdjacencyGraph, and others
1 parent a52fc47 commit 2b0d469

File tree

11 files changed

+501
-72
lines changed

11 files changed

+501
-72
lines changed

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,14 +36,15 @@ Sequences
3636
* structure [`DelayedSeq`](doc/DelayedSeq.md)
3737
* structure [`SeqBasis`](doc/SeqBasis.md)
3838

39-
Sorting
39+
Sorting and Permutations
4040
* structure [`Merge`](doc/Merge.md)
4141
* structure [`StableMerge`](doc/StableMerge.md)
4242
* structure [`StableSort`](doc/StableSort.md)
4343
* structure [`Mergesort`](doc/Mergesort.md)
4444
* structure [`SampleSort`](doc/SampleSort.md)
4545
* structure [`CountingSort`](doc/CountingSort.md)
4646
* structure [`Quicksort`](doc/Quicksort.md)
47+
* structure [`Shuffle`](doc/Shuffle.md)
4748

4849
Searching
4950
* structure [`BinarySearch`](doc/BinarySearch.md)

doc/BinarySearch.md

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,3 +26,64 @@ according to `cmp`, then this will find the "leftmost" one.
2626
**Requires** the input sequence must be sorted w.r.t. `cmp`.
2727

2828
Logarithmic work and span.
29+
30+
31+
```sml
32+
val searchPosition: 'a Seq.t -> ('a -> order) -> int
33+
```
34+
35+
`searchPosition s cmpTargetAgainst` finds a target position in the sequence
36+
by using `cmpTargetAgainst` to point towards the target position. This is
37+
useful when you aren't looking for a specific element, but some location
38+
within a sequence. Note that this is more general than the plain `search`
39+
function, because we can implement `search` in terms of
40+
`searchPosition` as follows:
41+
`fun search cmp s x = searchPosition s (fn y => cmp (x, y))`.
42+
43+
**Requires** the input sequence must be sorted w.r.t. `cmpTargetAgainst`.
44+
45+
Logarithmic work and span.
46+
47+
48+
## Examples
49+
50+
Suppose `table: (key * value) seq` represents a mapping from keys to values,
51+
and it is sorted by key. Here we use `searchPosition` to look up the value
52+
associated with a particular key `target`:
53+
```sml
54+
fun lookup
55+
{ table: (key * value) Seq.t
56+
, keyCmp: key * key -> order
57+
, target: key
58+
}
59+
: value option
60+
=
61+
let
62+
val n = Seq.length table
63+
64+
(** result of this call is an idx such that table[idx] contains the
65+
* target key, if the table contains the target key.
66+
*)
67+
val idx = BinarySearch.searchPosition table (fn k => keyCmp (target, k))
68+
in
69+
(** now we need to check if the table actually contains the key *)
70+
if idx = n then
71+
(** In this case, the target position is at the end of the sequence,
72+
* i.e., it is larger than any key in the table. So this key is
73+
* NOT in the table.
74+
*)
75+
NONE
76+
else
77+
(** In this case, the target position is somewhere in the middle of
78+
* the sequence. It may or may not be in the table though; we need to
79+
* inspect the key that is at table[idx]
80+
*)
81+
let
82+
val (k, v) = Seq.nth table idx
83+
in
84+
case keyCmp (target, k) of
85+
EQUAL => SOME v
86+
| _ => NONE
87+
end
88+
end
89+
```

doc/Shuffle.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# structure Shuffle
2+
3+
```sml
4+
val shuffle: 'a Seq.t -> int -> 'a Seq.t
5+
```
6+
7+
`shuffle s seed` produces a pseudorandom permutation of `s` based on the
8+
random seed `seed`.
9+
10+
For a particular seed, it will always produce
11+
the same result. Any two shuffles (using two different seeds) are independent.
12+
E.g. `shuffle s seed` is independent of `shuffle s (seed+1)`.
13+
14+
Linear work and polylogarithmic span.

lib/github.com/mpllang/mpllib/AdjacencyGraph.sml

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,80 @@ struct
1111
val maxVal = toInt (valOf maxInt)
1212
end
1313

14+
structure VertexSubset =
15+
struct
16+
datatype h = SPARSE of Vertex.t Seq.t | DENSE of int Seq.t
17+
type t = h * int
18+
exception BadRep
19+
20+
fun empty thresh = (SPARSE (Seq.empty()), thresh)
21+
22+
fun size (vs, thresh) =
23+
case vs of
24+
SPARSE s => Seq.length s
25+
| DENSE s => Seq.reduce op+ 0 s
26+
27+
fun plugOnes s positions =
28+
(Seq.foreach positions (fn (i, v) => AS.update (s, Vertex.toInt v, 1)))
29+
30+
fun append (vs, threshold) s n =
31+
case vs of
32+
SPARSE ss =>
33+
if (Seq.length ss) + (Seq.length s) > threshold then
34+
let
35+
val dense_rep = Seq.tabulate (fn x => 0) n
36+
val _ = plugOnes dense_rep ss
37+
val _ = plugOnes dense_rep s
38+
in
39+
(DENSE (dense_rep), threshold)
40+
end
41+
else (SPARSE(Seq.append (ss, s)), threshold)
42+
| DENSE ss => (plugOnes ss s; (DENSE ss, threshold))
43+
44+
fun sparse_to_dense vs n =
45+
case vs of
46+
SPARSE s =>
47+
let
48+
val dense_rep = Seq.tabulate (fn x => 0) n
49+
val _ = Seq.foreach s (fn (i, v) => AS.update (dense_rep, Vertex.toInt v, 1))
50+
in
51+
DENSE (dense_rep)
52+
end
53+
| DENSE _ => raise BadRep
54+
55+
fun dense_to_sparse vs =
56+
case vs of
57+
SPARSE _ => raise BadRep
58+
| DENSE s =>
59+
let
60+
val (offsets, total) = Seq.scan op+ 0 s
61+
val sparse = ForkJoin.alloc total
62+
val _ = Seq.foreach s (fn (i, v) =>
63+
if (v=1) then A.update (sparse, Seq.nth offsets i, Vertex.fromInt i)
64+
else if (v = 0) then ()
65+
else raise BadRep
66+
)
67+
in
68+
SPARSE (AS.full sparse)
69+
end
70+
71+
fun from_sparse_rep s threshold n =
72+
if (Seq.length s) < threshold then (SPARSE (s), threshold)
73+
else (sparse_to_dense (SPARSE (s)) n, threshold)
74+
75+
fun from_dense_rep s countopt threshold =
76+
let
77+
val count =
78+
case countopt of
79+
SOME x => x
80+
| NONE => Seq.reduce op+ 0 s
81+
val d = DENSE(s)
82+
in
83+
if count < threshold then (dense_to_sparse(d), threshold)
84+
else (d, threshold)
85+
end
86+
end
87+
1488
type vertex = Vertex.t
1589
fun vertexNth s v = Seq.nth s (Vertex.toInt v)
1690
fun vToWord v = Word64.fromInt (Vertex.toInt v)

lib/github.com/mpllang/mpllib/AdjacencyInt.sml

Lines changed: 104 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -4,114 +4,149 @@ struct
44

55
structure G = AdjacencyGraph(Int)
66
structure AS = ArraySlice
7-
structure DS = DelayedSeq
7+
open G.VertexSubset
88

9-
10-
(* fun sumOfOutDegrees frontier =
11-
SeqBasis.reduce 10000 op+ 0 (0, Seq.length frontier) (degree o Seq.nth frontier)
12-
(* DS.reduce op+ 0 (DS.map degree (DS.fromArraySeq frontier)) *)
13-
14-
fun shouldProcessDense frontier =
15-
let
16-
val n = Seq.length frontier
17-
val m = sumOfOutDegrees frontier
18-
in
19-
n + m > denseThreshold
20-
end *)
21-
22-
fun should_process_sparse g V =
9+
fun should_process_sparse g n =
2310
let
2411
val denseThreshold = G.numEdges g div 20
25-
val totalOutDegree =
26-
SeqBasis.reduce 10000 op+ 0 (0, Seq.length V) (G.degree g o Seq.nth V)
27-
val n = Seq.length V
28-
val m = totalOutDegree
12+
val deg = Int.div (G.numEdges g, G.numVertices g)
13+
val count = (1 + deg) * n
2914
in
30-
n + m <= denseThreshold
15+
count <= denseThreshold
3116
end
3217

33-
3418
fun edge_map_dense g vertices f h =
3519
let
36-
val inFrontier = Seq.tabulate (fn _ => false) (G.numVertices g)
37-
val _ = Seq.foreach vertices (fn (_, v) =>
38-
ArraySlice.update (inFrontier, v, true))
20+
val inFrontier = vertices
21+
val n = Seq.length vertices
22+
val res = Seq.tabulate (fn _ => 0) n
3923

4024
fun processVertex v =
41-
if not (h v) then NONE
25+
if not (h v) then 0
4226
else
4327
let
4428
val neighbors = G.neighbors g v
4529
fun loop i =
46-
if i >= Seq.length neighbors then NONE else
30+
if i >= Seq.length neighbors then 0 else
4731
let val u = Seq.nth neighbors i
4832
in
49-
if not (Seq.nth inFrontier u) then
33+
if not (Seq.nth inFrontier u = 1) then
5034
loop (i+1)
5135
else
52-
case f (u, v) of
53-
NONE => loop (i+1)
54-
| SOME x => SOME x
36+
case f (u, v) of
37+
NONE => loop (i+1)
38+
| SOME x => (AS.update (res, x, 1); 1)
5539
end
5640
in
5741
loop 0
5842
end
43+
val count = SeqBasis.reduce 1000 op+ 0 (0, n) processVertex
5944
in
60-
AS.full (SeqBasis.tabFilter 100 (0, G.numVertices g) processVertex)
45+
(res, count)
6146
end
6247

63-
6448
fun edge_map_sparse g vertices f h =
6549
let
66-
fun app_vertex u =
50+
val n = Seq.length vertices
51+
fun ui uidx = Seq.nth vertices uidx
52+
val r =
53+
SeqBasis.scan 1000 op+ 0 (0, n) (G.degree g o ui)
54+
val (offsets, totalOutDegree) = (AS.full r, Array.sub (r, n))
55+
val store = ForkJoin.alloc totalOutDegree
56+
val k = 100
57+
val numBlocks = 1 + (totalOutDegree-1) div k
58+
fun map_block i =
6759
let
68-
val all_ngbrs = (G.neighbors g u)
69-
fun ds i = let
70-
val v = Seq.nth all_ngbrs i
71-
in
72-
if h (v) then f (u, v)
73-
else NONE
74-
end
75-
val m = SeqBasis.tabFilter 10000 (0, Seq.length all_ngbrs) ds
60+
val lo = i*k
61+
val hi = Int.min((i+1)*k, totalOutDegree)
62+
val ulo =
63+
let
64+
val a = BinarySearch.search (Int.compare) offsets lo
65+
in
66+
if (Seq.nth offsets a) > lo then a - 1
67+
else a
68+
end
69+
fun map_seq idx (u, uidx) count =
70+
if idx >= hi then count
71+
else if idx >= (Seq.nth offsets (uidx + 1)) then map_seq idx (ui (uidx + 1), uidx + 1) count
72+
else
73+
let
74+
val v = Seq.nth (G.neighbors g u) (idx - (Seq.nth offsets uidx))
75+
in
76+
if (h v) then
77+
case f (u, v) of
78+
SOME x => (Array.update (store, lo + count, x); map_seq (idx + 1) (u, uidx) (count + 1))
79+
| NONE => (map_seq (idx + 1) (u, uidx) count)
80+
else
81+
(map_seq (idx + 1) (u, uidx) count)
82+
end
7683
in
77-
DS.fromArraySeq (AS.full m)
84+
map_seq lo (ui ulo, ulo) 0
7885
end
86+
val counts = SeqBasis.tabulate 1 (0, numBlocks) map_block
87+
val outOff = SeqBasis.scan 10000 op+ 0 (0, numBlocks) (fn i => Array.sub (counts, i))
88+
val outSize = Array.sub (outOff, numBlocks)
89+
val result = ForkJoin.alloc outSize
7990
in
80-
DS.toArraySeq (DS.flatten (DS.map app_vertex (DS.fromArraySeq vertices)))
91+
ForkJoin.parfor (totalOutDegree div (Int.max (outSize, 1))) (0, numBlocks) (fn i =>
92+
let
93+
val soff = i * k
94+
val doff = Array.sub (outOff, i)
95+
val size = Array.sub (outOff, i+1) - doff
96+
in
97+
Util.for (0, size) (fn j =>
98+
Array.update (result, doff+j, Array.sub (store, soff+j)))
99+
end);
100+
(AS.full result)
81101
end
82102

83-
fun edge_map g V f h =
84-
if should_process_sparse g V then
85-
edge_map_sparse g V f h
86-
else
87-
edge_map_dense g V f h
103+
fun edge_map g (vs, threshold) (fpar, f) h =
104+
case vs of
105+
SPARSE s =>
106+
from_sparse_rep (edge_map_sparse g s fpar h) threshold (G.numVertices g)
107+
| DENSE s =>
108+
let
109+
val (res, count) = edge_map_dense g s f h
110+
in
111+
from_dense_rep res (SOME count) threshold
112+
end
88113

89-
fun contract clusters g =
90-
let
91-
val n = G.numVertices g
92-
val vertices = Seq.tabulate (fn u => u) n
93-
val has_neighbor = Seq.tabulate (fn i => 0) n
114+
fun vertex_foreach g (vs, threshold) f =
115+
case vs of
116+
SPARSE s =>
117+
Seq.foreach s (fn (i, u) => f u)
118+
| DENSE s =>
119+
Seq.foreach s (fn (i, b) => if (b = 1) then (f i) else ())
94120

95-
fun upd (u, v) =
121+
fun vertex_map_ g (vs, threshold) f =
122+
case vs of
123+
SPARSE s =>
96124
let
97-
val (cu, cv) = ((Seq.nth clusters u), (Seq.nth clusters v))
125+
val s' =
126+
AS.full (SeqBasis.tabFilter 1000 (0, Seq.length s)
127+
(fn i =>
128+
let
129+
val u = Seq.nth s i
130+
val b = f u
131+
in
132+
if b then SOME u
133+
else NONE
134+
end
135+
))
98136
in
99-
if cu = cv then NONE
100-
else (AS.update (has_neighbor, cu, 1); SOME (cu, cv))
137+
(from_sparse_rep s' threshold (G.numVertices g))
101138
end
102-
val sorted_edges = G.dedupEdges (edge_map g vertices upd (fn _ => true))
103-
val (vmap, num_taken) = Seq.scan Int.+ 0 has_neighbor
104-
val new_sorted_edges = Seq.map (fn (x, y) => (Seq.nth vmap x, Seq.nth vmap y)) sorted_edges
105-
106-
fun new_label c =
139+
| DENSE s =>
107140
let
108-
val is_taken = (Seq.nth has_neighbor c) = 1
109-
val num_taken_left = Seq.nth vmap c
141+
val res =
142+
Seq.map (fn i => if (Seq.nth s i = 1) andalso f i then 1 else 0) s
110143
in
111-
if is_taken then num_taken_left
112-
else num_taken + (c - num_taken_left)
144+
from_dense_rep res NONE threshold
113145
end
114-
in
115-
(G.fromSortedEdges new_sorted_edges, new_label)
116-
end
146+
147+
148+
fun vertex_map g vs f needOut =
149+
if needOut then vertex_map_ g vs f
150+
else (vertex_foreach g vs; vs)
151+
117152
end

0 commit comments

Comments
 (0)