Skip to content

Commit

Permalink
fix issorted bug (#1391)
Browse files Browse the repository at this point in the history
  • Loading branch information
bkamins authored and nalimilan committed Apr 13, 2018
1 parent be4ec00 commit 1a241c9
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 7 deletions.
17 changes: 10 additions & 7 deletions src/abstractdataframe/sort.jl
Original file line number Diff line number Diff line change
Expand Up @@ -73,14 +73,11 @@ DFPerm(o::O, df::DF) where {O<:Ordering, DF<:AbstractDataFrame} = DFPerm{O,DF}(o
col_ordering(o::DFPerm{O}, i::Int) where {O<:Ordering} = o.ord
col_ordering(o::DFPerm{V}, i::Int) where {V<:AbstractVector} = o.ord[i]

Base.@propagate_inbounds Base.getindex(o::DFPerm, i::Int, j::Int) = o.df[i, j]
Base.@propagate_inbounds Base.getindex(o::DFPerm, a::DataFrameRow, j::Int) = a[j]

function Sort.lt(o::DFPerm, a, b)
@inbounds for i = 1:ncol(o.df)
@inbounds for i in 1:ncol(o.df)
ord = col_ordering(o, i)
va = o[a, i]
vb = o[b, i]
va = o.df[a, i]
vb = o.df[b, i]
lt(ord, va, vb) && return true
lt(ord, vb, va) && return false
end
Expand Down Expand Up @@ -272,7 +269,13 @@ function Base.issorted(df::AbstractDataFrame, cols_new=[]; cols=[],
:issorted)
cols_new = cols
end
issorted(eachrow(df), ordering(df, cols_new, lt, by, rev, order))
if cols_new isa ColumnIndex
issorted(df[cols_new], lt=lt, by=by, rev=rev, order=order)
elseif length(cols_new) == 1
issorted(df[cols_new[1]], lt=lt, by=by, rev=rev, order=order)
else
issorted(1:nrow(df), ordering(df, cols_new, lt, by, rev, order))
end
end

# sort and sortperm functions
Expand Down
50 changes: 50 additions & 0 deletions test/sort.jl
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,17 @@ module TestSort

@test df == ds

df = DataFrame(x = [3, 1, 2, 1], y = ["b", "c", "a", "b"])
@test !issorted(df, :x)
@test issorted(sort(df, :x), :x)

x = DataFrame(a=1:3,b=3:-1:1,c=3:-1:1)
@test issorted(x)
@test !issorted(x, [:b,:c])
@test !issorted(x[2:3], [:b,:c])
@test issorted(sort(x,[2,3]), [:b,:c])
@test issorted(sort(x[2:3]), [:b,:c])

# Check that columns that shares the same underlying array are only permuted once PR#1072
df = DataFrame(a=[2,1])
df[:b] = df[:a]
Expand All @@ -52,4 +63,43 @@ module TestSort
sort!(x, :y)
@test x[:y] == [1,2,3,4]
@test x[:x] == [1,3,2,4]

@test_throws ArgumentError sort(x, by=:x)

srand(1)
# here there will be probably no ties
df_rand1 = DataFrame(rand(100, 4))
# but here we know we will have ties
df_rand2 = df_rand1[:]
df_rand2[:x1] = shuffle([fill(1, 50); fill(2, 50)])
df_rand2[:x4] = shuffle([fill(1, 50); fill(2, 50)])

# test sorting by 1 column
for df_rand in [df_rand1, df_rand2]
# testing sort
for n1 in names(df_rand)
# passing column name
@test sort(df_rand, n1) == df_rand[sortperm(df_rand[n1]),:]
# passing vector with one column name
@test sort(df_rand, [n1]) == df_rand[sortperm(df_rand[n1]),:]
# passing vector with two column names
for n2 in setdiff(names(df_rand), [n1])
@test sort(df_rand, [n1,n2]) == df_rand[sortperm(collect(zip(df_rand[n1],
df_rand[n2]))),:]
end
end
# testing if sort! is consistent with issorted and sort
ref_df = df_rand[:]
for n1 in names(df_rand)
@test sort!(df_rand, n1) == sort(ref_df, n1)
@test issorted(df_rand, n1)
@test sort!(df_rand, [n1]) == sort(ref_df, [n1])
@test issorted(df_rand, [n1])
for n2 in setdiff(names(df_rand), [n1])
@test sort!(df_rand, [n1, n2]) == sort(ref_df, [n1, n2])
@test issorted(df_rand, n1)
@test issorted(df_rand, [n1, n2])
end
end
end
end

0 comments on commit 1a241c9

Please sign in to comment.