From 0557c0e5dc140e7bbf48e3b2ca8c8fdd8bfda031 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Thu, 19 Apr 2018 10:11:58 +0200 Subject: [PATCH] Always collect AbstractRange (#1392) --- src/dataframe/dataframe.jl | 16 +++++++++++----- test/dataframe.jl | 7 +++++++ 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/src/dataframe/dataframe.jl b/src/dataframe/dataframe.jl index 454e82ff54..47c277e572 100644 --- a/src/dataframe/dataframe.jl +++ b/src/dataframe/dataframe.jl @@ -40,12 +40,17 @@ Each column in `columns` should be the same length. **Notes** A `DataFrame` is a lightweight object. As long as columns are not -manipulated, creation of a DataFrame from existing AbstractVectors is +manipulated, creation of a `DataFrame` from existing AbstractVectors is inexpensive. For example, indexing on columns is inexpensive, but indexing by rows is expensive because copies are made of each column. -Because column types can vary, a DataFrame is not type stable. For -performance-critical code, do not index into a DataFrame inside of +If a column is passed to a `DataFrame` constructor or is assigned as a whole +using `setindex!` then its reference is stored in the `DataFrame`. An exception +to this rule is assignment of an `AbstractRange` as a column, in which case the +range is collected to a `Vector`. + +Because column types can vary, a `DataFrame` is not type stable. For +performance-critical code, do not index into a `DataFrame` inside of loops. **Examples** @@ -325,12 +330,13 @@ end # Will automatically add a new column if needed function insert_single_column!(df::DataFrame, - dv::AbstractVector, + v::AbstractVector, col_ind::ColumnIndex) - if ncol(df) != 0 && nrow(df) != length(dv) + if ncol(df) != 0 && nrow(df) != length(v) throw(ArgumentError("New columns must have the same length as old columns")) end + dv = isa(v, AbstractRange) ? collect(v) : v if haskey(index(df), col_ind) j = index(df)[col_ind] df.columns[j] = dv diff --git a/test/dataframe.jl b/test/dataframe.jl index 9e2b9e2f43..58483c656c 100644 --- a/test/dataframe.jl +++ b/test/dataframe.jl @@ -635,6 +635,13 @@ module TestDataFrame @test names(df) == [:x3, :x3_1, :x3_2, :x4] end + @testset "passing range to a DataFrame" begin + df = DataFrame(a=1:3, b='a':'c') + df[:c] = 1:3 + df[:d] = 'a':'c' + @test all(typeof(df[i]) <: Vector for i in 1:ncol(df)) + end + @testset "handling of end in indexing" begin z = DataFrame(rand(4,5)) for x in [z, view(z, 1:4)]