Skip to content

Commit

Permalink
Bugfix ChainedVector constructor performance
Browse files Browse the repository at this point in the history
Currently the ChainedVector constructor has bad performance if the args have empty entries.

```
import Random
struct MyType
    x::Float64
    y::Float64
end

x1 = [ MyType[MyType(rand(), rand()) for _ in 1:Random.rand(1:6)] for _ in 1:1_000_000];
@time ChainedVector(x1);  # 0.017 secs

x2 = [ MyType[MyType(rand(), rand()) for _ in 1:Random.rand(0:5)] for _ in 1:1_000_000];
@time ChainedVector(x2);  # 24 secs
```

Note that the difference comes only from the fact that x2 might have empty entries
whereas x1 doesn't.

The problem is that the function cleanup! has runtime proportional to the number of
elements times the number of elements which are empty vectors.

```
function cleanup_original!(arrays, inds)
    @Assert length(arrays) == length(inds)
    for i = length(arrays):-1:1
        if !isassigned(arrays, i) || length(arrays[i]) == 0
            deleteat!(arrays, i)
            deleteat!(inds, i)
        end
    end
    return
end

function cleanup_new!(arrays, inds)
    @Assert length(arrays) == length(inds)
    mask_it = Base.Iterators.filter(i->isassigned(arrays, i) && length(arrays[i])>0, 1:length(arrays))
    n = 0
    for k in mask_it
        n += 1
        k > n || continue
        arrays[n] = arrays[k]
        inds[n] = inds[k]
    end
    resize!(arrays, n)
    resize!(inds, n)
    return
end
```

We can check that the new version is faster and returns the same result

```
x = [ MyType[MyType(rand(), rand()) for _ in 1:Random.rand(0:5)] for _ in 1:1_000_000];
arrays = x;
inds = Vector{Int}(undef, length(arrays));

x1 = copy(x);
inds1 = copy(inds);
x2 = copy(x);
inds2 = copy(inds);

@time cleanup_original!(x1, inds1)  # 20 secs
@time cleanup_new!(x2, inds2)  # 0.04 secs

all(x1 .== x2)  # true
all(inds1 .== inds2)  # true
```
  • Loading branch information
JoaoAparicio authored and quinnj committed Jul 7, 2024
1 parent 008e818 commit ae1085f
Showing 1 changed file with 10 additions and 6 deletions.
16 changes: 10 additions & 6 deletions src/chainedvector.jl
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,16 @@ end

@inline function cleanup!(arrays, inds)
@assert length(arrays) == length(inds)
for i = length(arrays):-1:1
if !isassigned(arrays, i) || length(arrays[i]) == 0
deleteat!(arrays, i)
deleteat!(inds, i)
end
end
mask_it = Base.Iterators.filter(i->isassigned(arrays, i) && length(arrays[i])>0, 1:length(arrays))
n = 0
for k in mask_it
n += 1
k > n || continue
arrays[n] = arrays[k]
inds[n] = inds[k]
end
resize!(arrays, n)
resize!(inds, n)
return
end

Expand Down

0 comments on commit ae1085f

Please sign in to comment.