Skip to content

Commit

Permalink
Don't pool small inline string columns by default
Browse files Browse the repository at this point in the history
Fixes #982. Since the size of `String1` and `String3` are <= the size of
the ref integer type we use for pooling (`UInt32`), let's avoid pooling
them by default. Users can still request specific columns be pooled like
always.
  • Loading branch information
quinnj authored and nickrobinson251 committed Oct 7, 2022
1 parent 3d47734 commit 33d1d85
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 12 deletions.
2 changes: 1 addition & 1 deletion src/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ tupcat(::Type{Tuple{T, T2, T3}}, S) where {T, T2, T3} = Tuple{T, T2, T3, S}
tupcat(::Type{Tuple{T, T2, T3, T4}}, S) where {T, T2, T3, T4} = Tuple{T, T2, T3, T4, S}
tupcat(::Type{T}, S) where {T <: Tuple} = Tuple{Any[(fieldtype(T, i) for i = 1:fieldcount(T))..., S]...}

const StringTypes = Union{Type{String}, Type{PosLenString}, Type{<:InlineString}}
const StringTypes = Union{Type{String}, Type{PosLenString}, Type{InlineString}, Type{String7}, Type{String15}, Type{String31}, Type{String63}, Type{String127}, Type{String255}}
pickstringtype(T, maxstringsize) = T === InlineString ? (maxstringsize < DEFAULT_MAX_INLINE_STRING_LENGTH ? InlineStringType(maxstringsize) : String) : T

# we define our own bit flag on a Parsers.ReturnCode to signal if a column needs to promote to string
Expand Down
11 changes: 11 additions & 0 deletions test/basics.jl
Original file line number Diff line number Diff line change
Expand Up @@ -819,4 +819,15 @@ f = CSV.File(IOBuffer(str); delim=" ", header=false, types=(i,nm) -> (i == 5 ? I
f = CSV.File(IOBuffer(str); delim=" ", header=false, types=Dict(r".*" => Float16))
@test Float16 <: eltype(f.Column5)

# 982
data = """a,b,c,d
A,BB,CCC,DDDD
A,BB,CCC,DDDD
"""
f = CSV.File(IOBuffer(data))
@test !(f.a isa PooledArray)
@test !(f.b isa PooledArray)
@test !(f.c isa PooledArray)
@test f.d isa PooledArray

end
22 changes: 11 additions & 11 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -24,27 +24,27 @@ include("write.jl")

@testset "PooledArrays" begin

f = CSV.File(IOBuffer("X\nb\nc\na\nc"), pool=true)
@test typeof(f.X) == PooledArrays.PooledArray{InlineString1,UInt32,1,Array{UInt32,1}}
f = CSV.File(IOBuffer("X\nbbbb\ncccc\naaaa\ncccc"), pool=true)
@test typeof(f.X) == PooledArrays.PooledArray{String7,UInt32,1,Array{UInt32,1}}
@test (length(f), length(f.names)) == (4, 1)
@test f.X == ["b", "c", "a", "c"]
@test f.X == ["bbbb", "cccc", "aaaa", "cccc"]
@test f.X.refs[2] == f.X.refs[4]

f = CSV.File(IOBuffer("X\nb\nc\na\nc"), pool=0.75)
@test typeof(f.X) == PooledArrays.PooledArray{InlineString1,UInt32,1,Array{UInt32,1}}
f = CSV.File(IOBuffer("X\nbbbb\ncccc\naaaa\ncccc"), pool=0.75)
@test typeof(f.X) == PooledArrays.PooledArray{String7,UInt32,1,Array{UInt32,1}}
@test (length(f), length(f.names)) == (4, 1)
@test f.X == ["b", "c", "a", "c"]
@test f.X == ["bbbb", "cccc", "aaaa", "cccc"]
@test f.X.refs[2] == f.X.refs[4]

f = CSV.File(IOBuffer("X\nb\nc\n\nc"), pool=true, ignoreemptyrows=false)
@test typeof(f.X) == PooledArray{Union{Missing, InlineString1},UInt32,1,Array{UInt32,1}}
f = CSV.File(IOBuffer("X\nbbbb\ncccc\n\ncccc"), pool=true, ignoreemptyrows=false)
@test typeof(f.X) == PooledArray{Union{Missing, String7},UInt32,1,Array{UInt32,1}}
@test (length(f), length(f.names)) == (4, 1)
@test f.X[3] === missing

f = CSV.File(IOBuffer("X\nc\nc\n\nc\nc\nc\nc\nc\nc"), pool=0.25, ignoreemptyrows=false)
@test typeof(f.X) == PooledArray{Union{Missing, InlineString1},UInt32,1,Array{UInt32,1}}
f = CSV.File(IOBuffer("X\ncccc\ncccc\n\ncccc\ncccc\ncccc\ncccc\ncccc\ncccc"), pool=0.25, ignoreemptyrows=false)
@test typeof(f.X) == PooledArray{Union{Missing, String7},UInt32,1,Array{UInt32,1}}
@test (length(f), length(f.names)) == (9, 1)
@test isequal(f.X, ["c", "c", missing, "c", "c", "c", "c", "c", "c"])
@test isequal(f.X, ["cccc", "cccc", missing, "cccc", "cccc", "cccc", "cccc", "cccc", "cccc"])

end

Expand Down

0 comments on commit 33d1d85

Please sign in to comment.