diff --git a/docs/make.jl b/docs/make.jl index d5b2f51..b1ca253 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -1,14 +1,14 @@ using Documenter, CSVFiles makedocs( - modules = [CSVFiles], - sitename = "CSVFiles.jl", - analytics="UA-132838790-1", - pages = [ + modules=[CSVFiles], + sitename="CSVFiles.jl", + analytics="UA-132838790-1", + pages=[ "Introduction" => "index.md" ] ) deploydocs( - repo = "github.com/queryverse/CSVFiles.jl.git" + repo="github.com/queryverse/CSVFiles.jl.git" ) diff --git a/src/CSVFiles.jl b/src/CSVFiles.jl index 7be4e06..850ad36 100644 --- a/src/CSVFiles.jl +++ b/src/CSVFiles.jl @@ -50,36 +50,36 @@ end Base.showable(::MIME"application/vnd.dataresource+json", source::CSVStream) = true -function fileio_load(f::FileIO.File{FileIO.format"CSV"}, deprecated_delim=nothing; delim=deprecated_delim===nothing ? ',' : deprecated_delim, args...) - if deprecated_delim!==nothing - deprecated_delim!=delim && error("deprecated_delim and delim can not both be used at the same time.") +function fileio_load(f::FileIO.File{FileIO.format"CSV"}, deprecated_delim=nothing; delim=deprecated_delim === nothing ? ',' : deprecated_delim, args...) + if deprecated_delim !== nothing + deprecated_delim != delim && error("deprecated_delim and delim can not both be used at the same time.") Base.depwarn("The positional `delim` keyword in the `load` function is deprecated. Instead use the keyword argument `delim`.", :CSVFiles) end return CSVFile(f.filename, delim, args) end -function fileio_load(f::FileIO.File{FileIO.format"TSV"}, deprecated_delim=nothing; delim=deprecated_delim===nothing ? '\t' : deprecated_delim, args...) - if deprecated_delim!==nothing - deprecated_delim!=delim && error("deprecated_delim and delim can not both be used at the same time.") +function fileio_load(f::FileIO.File{FileIO.format"TSV"}, deprecated_delim=nothing; delim=deprecated_delim === nothing ? '\t' : deprecated_delim, args...) + if deprecated_delim !== nothing + deprecated_delim != delim && error("deprecated_delim and delim can not both be used at the same time.") Base.depwarn("The positional `delim` keyword in the `load` function is deprecated. Instead use the keyword argument `delim`.", :CSVFiles) end return CSVFile(f.filename, delim, args) end -function fileio_load(s::FileIO.Stream{FileIO.format"CSV"}, deprecated_delim=nothing; delim=deprecated_delim===nothing ? ',' : deprecated_delim, args...) - if deprecated_delim!==nothing - deprecated_delim!=delim && error("deprecated_delim and delim can not both be used at the same time.") +function fileio_load(s::FileIO.Stream{FileIO.format"CSV"}, deprecated_delim=nothing; delim=deprecated_delim === nothing ? ',' : deprecated_delim, args...) + if deprecated_delim !== nothing + deprecated_delim != delim && error("deprecated_delim and delim can not both be used at the same time.") Base.depwarn("The positional `delim` keyword in the `load` function is deprecated. Instead use the keyword argument `delim`.", :CSVFiles) end return CSVStream(s.io, delim, args) end -function fileio_load(s::FileIO.Stream{FileIO.format"TSV"}, deprecated_delim=nothing; delim=deprecated_delim===nothing ? '\t' : deprecated_delim, args...) - if deprecated_delim!==nothing - deprecated_delim!=delim && error("deprecated_delim and delim can not both be used at the same time.") +function fileio_load(s::FileIO.Stream{FileIO.format"TSV"}, deprecated_delim=nothing; delim=deprecated_delim === nothing ? '\t' : deprecated_delim, args...) + if deprecated_delim !== nothing + deprecated_delim != delim && error("deprecated_delim and delim can not both be used at the same time.") Base.depwarn("The positional `delim` keyword in the `load` function is deprecated. Instead use the keyword argument `delim`.", :CSVFiles) end @@ -114,7 +114,7 @@ end function TableTraits.get_columns_copy_using_missing(file::CSVFile) columns, colnames = _loaddata(file) - return NamedTuple{(Symbol.(colnames)...,), Tuple{typeof.(columns)...}}((columns...,)) + return NamedTuple{(Symbol.(colnames)...,),Tuple{typeof.(columns)...}}((columns...,)) end function IteratorInterfaceExtensions.getiterator(s::CSVStream) @@ -127,7 +127,7 @@ end function TableTraits.get_columns_copy_using_missing(s::CSVStream) columns, colnames = TextParse.csvread(s.io, s.delim; stringarraytype=Array, s.keywords...) - return NamedTuple{(Symbol.(colnames)...,), Tuple{typeof.(columns)...}}((columns...,)) + return NamedTuple{(Symbol.(colnames)...,),Tuple{typeof.(columns)...}}((columns...,)) end function Base.collect(x::CSVFile) diff --git a/src/csv_writer.jl b/src/csv_writer.jl index c439490..63458e6 100644 --- a/src/csv_writer.jl +++ b/src/csv_writer.jl @@ -5,7 +5,7 @@ end function _writevalue(io::IO, value::AbstractString, delim, quotechar, escapechar, nastring) print(io, quotechar) for c in value - if c==quotechar || c==escapechar + if c == quotechar || c == escapechar print(io, escapechar) end print(io, c) @@ -31,12 +31,12 @@ end n = length(col_names) push_exprs = Expr(:block) for i in 1:n - push!(push_exprs.args, :( _writevalue(io, i.$(col_names[i]), delim, quotechar, escapechar, nastring) )) - if i "$(escapechar)$(quotechar)") * "$(quotechar)" for colname in colnames],delim) + join(io, ["$(quotechar)" * replace(string(colname), quotechar => "$(escapechar)$(quotechar)") * "$(quotechar)" for colname in colnames], delim) end println(io) end @@ -69,11 +69,11 @@ function _save(filename::AbstractString, data; delim=',', quotechar='"', escapec if ext == "gz" # Gzipped open(GzipCompressorStream, filename, "w") do io - _save(io, data, delim=delim, quotechar=quotechar, escapechar=escapechar, nastring=nastring, header=header) + _save(io, data, delim=delim, quotechar=quotechar, escapechar=escapechar, nastring=nastring, header=header) end else open(filename, "w") do io - _save(io, data, delim=delim, quotechar=quotechar, escapechar=escapechar, nastring=nastring, header=header) + _save(io, data, delim=delim, quotechar=quotechar, escapechar=escapechar, nastring=nastring, header=header) end end end @@ -99,7 +99,7 @@ end # # Streaming version writes header (if any) on first call, then appends on subsequent calls. # -const CSV_or_TSV = Union{FileIO.format"CSV", FileIO.format"TSV"} +const CSV_or_TSV = Union{FileIO.format"CSV",FileIO.format"TSV"} _delim(T) = T <: FileIO.format"CSV" ? ',' : '\t' @@ -112,26 +112,26 @@ mutable struct CSVFileSaveStream{T} nastring::AbstractString header::Bool end - -function fileio_savestreaming(f::FileIO.File{T}, data=nothing; delim=_delim(T), quotechar='"', escapechar='"', nastring="NA", - header=true) where T <: CSV_or_TSV + +function fileio_savestreaming(f::FileIO.File{T}, data=nothing; delim=_delim(T), quotechar='"', escapechar='"', nastring="NA", + header=true) where T<:CSV_or_TSV io = open(f.filename, "w") - if data!==nothing + if data !== nothing _save(io, data; delim=delim, quotechar=quotechar, escapechar=escapechar, nastring=nastring, header=header) end - return CSVFileSaveStream(io, data!==nothing, delim, quotechar, escapechar, nastring, header) + return CSVFileSaveStream(io, data !== nothing, delim, quotechar, escapechar, nastring, header) end -function fileio_savestreaming(s::FileIO.Stream{T}, data=nothing; delim=_delim(T), quotechar='"', escapechar='"', nastring="NA", - header=false) where T <: CSV_or_TSV +function fileio_savestreaming(s::FileIO.Stream{T}, data=nothing; delim=_delim(T), quotechar='"', escapechar='"', nastring="NA", + header=false) where T<:CSV_or_TSV - if data!==nothing + if data !== nothing _save(s.io, data; delim=delim, quotechar=quotechar, escapechar=escapechar, nastring=nastring, header=header) end - - return CSVFileSaveStream(s.io, data!==nothing, delim, quotechar, escapechar, nastring, header) + + return CSVFileSaveStream(s.io, data !== nothing, delim, quotechar, escapechar, nastring, header) end function Base.write(s::CSVFileSaveStream, data) diff --git a/test/runtests.jl b/test/runtests.jl index ef4ddc0..8e99197 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -7,238 +7,237 @@ using Test @testset "CSVFiles" begin -@testset "basic" begin - array = collect(load(joinpath(@__DIR__, "data.csv"))) - @test length(array) == 3 - @test array == [(Name="John",Age=34.,Children=2),(Name="Sally",Age=54.,Children=1),(Name="Jim",Age=23.,Children=0)] + @testset "basic" begin + array = collect(load(joinpath(@__DIR__, "data.csv"))) + @test length(array) == 3 + @test array == [(Name="John", Age=34., Children=2), (Name="Sally", Age=54., Children=1), (Name="Jim", Age=23., Children=0)] - output_filename = tempname() * ".csv" + output_filename = tempname() * ".csv" - try - array |> save(output_filename) + try + array |> save(output_filename) - array2 = collect(load(output_filename)) + array2 = collect(load(output_filename)) - @test array == array2 - finally - GC.gc() - rm(output_filename) + @test array == array2 + finally + GC.gc() + rm(output_filename) + end end -end -@testset "traits" begin - csvf = load(joinpath(@__DIR__, "data.csv")) + @testset "traits" begin + csvf = load(joinpath(@__DIR__, "data.csv")) - @test IteratorInterfaceExtensions.isiterable(csvf) == true - @test TableTraits.isiterabletable(csvf) == true - @test TableTraits.supports_get_columns_copy_using_missing(csvf) == true -end + @test IteratorInterfaceExtensions.isiterable(csvf) == true + @test TableTraits.isiterabletable(csvf) == true + @test TableTraits.supports_get_columns_copy_using_missing(csvf) == true + end -@testset "missing values" begin - array3 = [(a=DataValue(3),b="df\"e"),(a=DataValue{Int}(),b="something")] + @testset "missing values" begin + array3 = [(a=DataValue(3), b="df\"e"), (a=DataValue{Int}(), b="something")] - @testset "default" begin - output_filename2 = tempname() * ".csv" + @testset "default" begin + output_filename2 = tempname() * ".csv" - try - array3 |> save(output_filename2) - finally - rm(output_filename2) + try + array3 |> save(output_filename2) + finally + rm(output_filename2) + end end - end - @testset "alternate" begin - output_filename2 = tempname() * ".csv" + @testset "alternate" begin + output_filename2 = tempname() * ".csv" - try - array3 |> save(output_filename2, nastring="") - finally - rm(output_filename2) + try + array3 |> save(output_filename2, nastring="") + finally + rm(output_filename2) + end end end -end - -@testset "Column interface" begin - csvf2 = load(joinpath(@__DIR__, "data.csv")) - @test TableTraits.supports_get_columns_copy_using_missing(csvf2) == true - data = TableTraits.get_columns_copy_using_missing(csvf2) - @test data == (Name=["John", "Sally", "Jim"], Age=[34.,54.,23.], Children=[2,1,0]) -end - -@testset "Less Basic" begin - array = [(Name="John",Age=34.,Children=2),(Name="Sally",Age=54.,Children=1),(Name="Jim",Age=23.,Children=0)] - - @test_broken false # TODO Reenable download test once FileIO is fixed - # @testset "remote loading" begin - # rem_array = collect(load("https://raw.githubusercontent.com/queryverse/CSVFiles.jl/v0.2.0/test/data.csv")) - # @test length(rem_array) == 3 - # @test rem_array == array - # end - - @testset "can round trip TSV" begin - output_filename3 = tempname() * ".tsv" - - try - array |> save(output_filename3) - - array4 = collect(load(output_filename3)) - @test length(array4) == 3 - @test array4 == array - finally - GC.gc() - rm(output_filename3) - end + + @testset "Column interface" begin + csvf2 = load(joinpath(@__DIR__, "data.csv")) + @test TableTraits.supports_get_columns_copy_using_missing(csvf2) == true + data = TableTraits.get_columns_copy_using_missing(csvf2) + @test data == (Name=["John", "Sally", "Jim"], Age=[34., 54., 23.], Children=[2, 1, 0]) end - - @testset "no quote" begin - output_filename4 = tempname() * ".csv" - try - array |> save(output_filename4, quotechar=nothing) + @testset "Less Basic" begin + array = [(Name="John", Age=34., Children=2), (Name="Sally", Age=54., Children=1), (Name="Jim", Age=23., Children=0)] + + @test_broken false # TODO Reenable download test once FileIO is fixed + # @testset "remote loading" begin + # rem_array = collect(load("https://raw.githubusercontent.com/queryverse/CSVFiles.jl/v0.2.0/test/data.csv")) + # @test length(rem_array) == 3 + # @test rem_array == array + # end + + @testset "can round trip TSV" begin + output_filename3 = tempname() * ".tsv" + + try + array |> save(output_filename3) + + array4 = collect(load(output_filename3)) + @test length(array4) == 3 + @test array4 == array + finally + GC.gc() + rm(output_filename3) + end + end - finally - GC.gc() - rm(output_filename4) + @testset "no quote" begin + output_filename4 = tempname() * ".csv" + + try + array |> save(output_filename4, quotechar=nothing) + + finally + GC.gc() + rm(output_filename4) + end end end -end - -@testset "Streams" begin - data = [(Name="John",Age=34.,Children=2),(Name="Sally",Age=54.,Children=1),(Name="Jim",Age=23.,Children=0)] - - @testset "CSV" begin - stream = IOBuffer() - mark(stream) - fileiostream = FileIO.Stream{FileIO.format"CSV"}(stream) - save(fileiostream, data) - reset(stream) - mark(stream) - csvstream = load(fileiostream) - reloaded_data = collect(csvstream) - @test IteratorInterfaceExtensions.isiterable(csvstream) - @test TableTraits.isiterabletable(csvstream) - @test TableTraits.supports_get_columns_copy_using_missing(csvstream) - @test reloaded_data == data - - reset(stream) - csvstream = load(fileiostream) - reloaded_data2 = TableTraits.get_columns_copy_using_missing(csvstream) - @test reloaded_data2 == (Name=["John", "Sally", "Jim"], Age=[34., 54., 23.], Children=[2, 1, 0]) - end - @testset "TSV" begin - stream = IOBuffer() - mark(stream) - fileiostream = FileIO.Stream{FileIO.format"TSV"}(stream) - save(fileiostream, data) - reset(stream) - mark(stream) - csvstream = load(fileiostream) - reloaded_data = collect(csvstream) - @test IteratorInterfaceExtensions.isiterable(csvstream) - @test TableTraits.isiterabletable(csvstream) - @test TableTraits.supports_get_columns_copy_using_missing(csvstream) - @test reloaded_data == data - - reset(stream) - csvstream = load(fileiostream) - reloaded_data2 = TableTraits.get_columns_copy_using_missing(csvstream) - @test reloaded_data2 == (Name=["John", "Sally", "Jim"], Age=[34., 54., 23.], Children=[2, 1, 0]) - end -end + @testset "Streams" begin + data = [(Name="John", Age=34., Children=2), (Name="Sally", Age=54., Children=1), (Name="Jim", Age=23., Children=0)] + + @testset "CSV" begin + stream = IOBuffer() + mark(stream) + fileiostream = FileIO.Stream{FileIO.format"CSV"}(stream) + save(fileiostream, data) + reset(stream) + mark(stream) + csvstream = load(fileiostream) + reloaded_data = collect(csvstream) + @test IteratorInterfaceExtensions.isiterable(csvstream) + @test TableTraits.isiterabletable(csvstream) + @test TableTraits.supports_get_columns_copy_using_missing(csvstream) + @test reloaded_data == data -@testset "Compression" begin - data = [(Name="John",Age=34.,Children=2),(Name="Sally",Age=54.,Children=1),(Name="Jim",Age=23.,Children=0)] + reset(stream) + csvstream = load(fileiostream) + reloaded_data2 = TableTraits.get_columns_copy_using_missing(csvstream) + @test reloaded_data2 == (Name=["John", "Sally", "Jim"], Age=[34., 54., 23.], Children=[2, 1, 0]) + end - @testset "CSV" begin - output_filename = "output.csv.gz" - try - save(File{format"CSV"}(output_filename), data) - reloaded_data = collect(load(File{format"CSV"}(output_filename))) + @testset "TSV" begin + stream = IOBuffer() + mark(stream) + fileiostream = FileIO.Stream{FileIO.format"TSV"}(stream) + save(fileiostream, data) + reset(stream) + mark(stream) + csvstream = load(fileiostream) + reloaded_data = collect(csvstream) + @test IteratorInterfaceExtensions.isiterable(csvstream) + @test TableTraits.isiterabletable(csvstream) + @test TableTraits.supports_get_columns_copy_using_missing(csvstream) @test reloaded_data == data - finally - rm(output_filename) + + reset(stream) + csvstream = load(fileiostream) + reloaded_data2 = TableTraits.get_columns_copy_using_missing(csvstream) + @test reloaded_data2 == (Name=["John", "Sally", "Jim"], Age=[34., 54., 23.], Children=[2, 1, 0]) end end - @testset "TSV" begin - output_filename = "output.tsv.gz" - try - save(File{format"TSV"}(output_filename), data) - reloaded_data = collect(load(File{format"TSV"}(output_filename))) - @test reloaded_data == data - finally - rm(output_filename) + @testset "Compression" begin + data = [(Name="John", Age=34., Children=2), (Name="Sally", Age=54., Children=1), (Name="Jim", Age=23., Children=0)] + + @testset "CSV" begin + output_filename = "output.csv.gz" + try + save(File{format"CSV"}(output_filename), data) + reloaded_data = collect(load(File{format"CSV"}(output_filename))) + @test reloaded_data == data + finally + rm(output_filename) + end + end + + @testset "TSV" begin + output_filename = "output.tsv.gz" + try + save(File{format"TSV"}(output_filename), data) + reloaded_data = collect(load(File{format"TSV"}(output_filename))) + @test reloaded_data == data + finally + rm(output_filename) + end end end -end - -@testset "show" begin - x = load(joinpath(@__DIR__, "data.csv")) - - @test sprint(show, x) == """ - 3x3 CSV file - Name │ Age │ Children - ──────┼──────┼───────── - John │ 34.0 │ 2 - Sally │ 54.0 │ 1 - Jim │ 23.0 │ 0 """ - - @test sprint((stream,data)->show(stream, "text/html", data), x) == - "
NameAgeChildren
"John"34.02
"Sally"54.01
"Jim"23.00
" - - @test sprint((stream,data)->show(stream, "application/vnd.dataresource+json", data), x) == - "{\"schema\":{\"fields\":[{\"name\":\"Name\",\"type\":\"string\"},{\"name\":\"Age\",\"type\":\"number\"},{\"name\":\"Children\",\"type\":\"integer\"}]},\"data\":[{\"Name\":\"John\",\"Age\":34.0,\"Children\":2},{\"Name\":\"Sally\",\"Age\":54.0,\"Children\":1},{\"Name\":\"Jim\",\"Age\":23.0,\"Children\":0}]}" - - @test showable("text/html", x) == true - @test showable("application/vnd.dataresource+json", x) == true - - open("data.csv", "r") do f - x2 = load(Stream{format"CSV"}(f)) - - @test sprint(show, x2) == """ - 3x3 CSV file - Name │ Age │ Children - ──────┼──────┼───────── - John │ 34.0 │ 2 - Sally │ 54.0 │ 1 - Jim │ 23.0 │ 0 """ - - @test sprint((stream,data)->show(stream, "text/html", data), x2) == - "
NameAgeChildren
"John"34.02
"Sally"54.01
"Jim"23.00
" - - @test sprint((stream,data)->show(stream, "application/vnd.dataresource+json", data), x2) == - "{\"schema\":{\"fields\":[{\"name\":\"Name\",\"type\":\"string\"},{\"name\":\"Age\",\"type\":\"number\"},{\"name\":\"Children\",\"type\":\"integer\"}]},\"data\":[{\"Name\":\"John\",\"Age\":34.0,\"Children\":2},{\"Name\":\"Sally\",\"Age\":54.0,\"Children\":1},{\"Name\":\"Jim\",\"Age\":23.0,\"Children\":0}]}" - - @test showable("text/html", x2) == true - @test showable("application/vnd.dataresource+json", x2) == true + + @testset "show" begin + x = load(joinpath(@__DIR__, "data.csv")) + + @test sprint(show, x) == """ + 3x3 CSV file + Name │ Age │ Children + ──────┼──────┼───────── + John │ 34.0 │ 2 + Sally │ 54.0 │ 1 + Jim │ 23.0 │ 0 """ + + @test sprint((stream, data) -> show(stream, "text/html", data), x) == + "
NameAgeChildren
"John"34.02
"Sally"54.01
"Jim"23.00
" + + @test sprint((stream, data) -> show(stream, "application/vnd.dataresource+json", data), x) == + "{\"schema\":{\"fields\":[{\"name\":\"Name\",\"type\":\"string\"},{\"name\":\"Age\",\"type\":\"number\"},{\"name\":\"Children\",\"type\":\"integer\"}]},\"data\":[{\"Name\":\"John\",\"Age\":34.0,\"Children\":2},{\"Name\":\"Sally\",\"Age\":54.0,\"Children\":1},{\"Name\":\"Jim\",\"Age\":23.0,\"Children\":0}]}" + + @test showable("text/html", x) == true + @test showable("application/vnd.dataresource+json", x) == true + + open("data.csv", "r") do f + x2 = load(Stream{format"CSV"}(f)) + + @test sprint(show, x2) == """ + 3x3 CSV file + Name │ Age │ Children + ──────┼──────┼───────── + John │ 34.0 │ 2 + Sally │ 54.0 │ 1 + Jim │ 23.0 │ 0 """ + + @test sprint((stream, data) -> show(stream, "text/html", data), x2) == + "
NameAgeChildren
"John"34.02
"Sally"54.01
"Jim"23.00
" + + @test sprint((stream, data) -> show(stream, "application/vnd.dataresource+json", data), x2) == + "{\"schema\":{\"fields\":[{\"name\":\"Name\",\"type\":\"string\"},{\"name\":\"Age\",\"type\":\"number\"},{\"name\":\"Children\",\"type\":\"integer\"}]},\"data\":[{\"Name\":\"John\",\"Age\":34.0,\"Children\":2},{\"Name\":\"Sally\",\"Age\":54.0,\"Children\":1},{\"Name\":\"Jim\",\"Age\":23.0,\"Children\":0}]}" + + @test showable("text/html", x2) == true + @test showable("application/vnd.dataresource+json", x2) == true + end + end - -end - -@testset "savestreaming" begin - using DataFrames - - df = DataFrame(A = 1:2:1000, B = repeat(1:10, inner=50), C = 1:500) - df1 = df[1:5, :] - df2 = df[6:10, :] - - # Test both csv and tsv formats - for ext in ("csv", "tsv") - fname = "output.$ext" - s = savestreaming(fname, df1) - write(s, df2) - write(s, df2) # add this slice twice - close(s) - - new_df = DataFrame(load(fname)) - @test new_df[1:5,:] == df1 - @test new_df[6:10,:] == df2 - @test new_df[11:15,:] == df2 - - rm(fname) + + @testset "savestreaming" begin + using DataFrames + + df = DataFrame(A=1:2:1000, B=repeat(1:10, inner=50), C=1:500) + df1 = df[1:5, :] + df2 = df[6:10, :] + + # Test both csv and tsv formats + for ext in ("csv", "tsv") + fname = "output.$ext" + s = savestreaming(fname, df1) + write(s, df2) + write(s, df2) # add this slice twice + close(s) + + new_df = DataFrame(load(fname)) + @test new_df[1:5, :] == df1 + @test new_df[6:10, :] == df2 + @test new_df[11:15, :] == df2 + + rm(fname) + end end -end end # Outer-most testset -