From 1a241c953c3357830df79d0372178827d60bc25b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Fri, 13 Apr 2018 19:43:02 +0200 Subject: [PATCH] fix issorted bug (#1391) --- src/abstractdataframe/sort.jl | 17 +++++++----- test/sort.jl | 50 +++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 7 deletions(-) diff --git a/src/abstractdataframe/sort.jl b/src/abstractdataframe/sort.jl index 22bd23e4c6..20197ec2f2 100644 --- a/src/abstractdataframe/sort.jl +++ b/src/abstractdataframe/sort.jl @@ -73,14 +73,11 @@ DFPerm(o::O, df::DF) where {O<:Ordering, DF<:AbstractDataFrame} = DFPerm{O,DF}(o col_ordering(o::DFPerm{O}, i::Int) where {O<:Ordering} = o.ord col_ordering(o::DFPerm{V}, i::Int) where {V<:AbstractVector} = o.ord[i] -Base.@propagate_inbounds Base.getindex(o::DFPerm, i::Int, j::Int) = o.df[i, j] -Base.@propagate_inbounds Base.getindex(o::DFPerm, a::DataFrameRow, j::Int) = a[j] - function Sort.lt(o::DFPerm, a, b) - @inbounds for i = 1:ncol(o.df) + @inbounds for i in 1:ncol(o.df) ord = col_ordering(o, i) - va = o[a, i] - vb = o[b, i] + va = o.df[a, i] + vb = o.df[b, i] lt(ord, va, vb) && return true lt(ord, vb, va) && return false end @@ -272,7 +269,13 @@ function Base.issorted(df::AbstractDataFrame, cols_new=[]; cols=[], :issorted) cols_new = cols end - issorted(eachrow(df), ordering(df, cols_new, lt, by, rev, order)) + if cols_new isa ColumnIndex + issorted(df[cols_new], lt=lt, by=by, rev=rev, order=order) + elseif length(cols_new) == 1 + issorted(df[cols_new[1]], lt=lt, by=by, rev=rev, order=order) + else + issorted(1:nrow(df), ordering(df, cols_new, lt, by, rev, order)) + end end # sort and sortperm functions diff --git a/test/sort.jl b/test/sort.jl index e1bb06cc57..2aa1d50e28 100644 --- a/test/sort.jl +++ b/test/sort.jl @@ -42,6 +42,17 @@ module TestSort @test df == ds + df = DataFrame(x = [3, 1, 2, 1], y = ["b", "c", "a", "b"]) + @test !issorted(df, :x) + @test issorted(sort(df, :x), :x) + + x = DataFrame(a=1:3,b=3:-1:1,c=3:-1:1) + @test issorted(x) + @test !issorted(x, [:b,:c]) + @test !issorted(x[2:3], [:b,:c]) + @test issorted(sort(x,[2,3]), [:b,:c]) + @test issorted(sort(x[2:3]), [:b,:c]) + # Check that columns that shares the same underlying array are only permuted once PR#1072 df = DataFrame(a=[2,1]) df[:b] = df[:a] @@ -52,4 +63,43 @@ module TestSort sort!(x, :y) @test x[:y] == [1,2,3,4] @test x[:x] == [1,3,2,4] + + @test_throws ArgumentError sort(x, by=:x) + + srand(1) + # here there will be probably no ties + df_rand1 = DataFrame(rand(100, 4)) + # but here we know we will have ties + df_rand2 = df_rand1[:] + df_rand2[:x1] = shuffle([fill(1, 50); fill(2, 50)]) + df_rand2[:x4] = shuffle([fill(1, 50); fill(2, 50)]) + + # test sorting by 1 column + for df_rand in [df_rand1, df_rand2] + # testing sort + for n1 in names(df_rand) + # passing column name + @test sort(df_rand, n1) == df_rand[sortperm(df_rand[n1]),:] + # passing vector with one column name + @test sort(df_rand, [n1]) == df_rand[sortperm(df_rand[n1]),:] + # passing vector with two column names + for n2 in setdiff(names(df_rand), [n1]) + @test sort(df_rand, [n1,n2]) == df_rand[sortperm(collect(zip(df_rand[n1], + df_rand[n2]))),:] + end + end + # testing if sort! is consistent with issorted and sort + ref_df = df_rand[:] + for n1 in names(df_rand) + @test sort!(df_rand, n1) == sort(ref_df, n1) + @test issorted(df_rand, n1) + @test sort!(df_rand, [n1]) == sort(ref_df, [n1]) + @test issorted(df_rand, [n1]) + for n2 in setdiff(names(df_rand), [n1]) + @test sort!(df_rand, [n1, n2]) == sort(ref_df, [n1, n2]) + @test issorted(df_rand, n1) + @test issorted(df_rand, [n1, n2]) + end + end + end end