From 6f134a473132b4a313cae6c6ca44caec62962a23 Mon Sep 17 00:00:00 2001 From: m-wells Date: Tue, 11 Feb 2020 20:22:14 -0500 Subject: [PATCH] fixed bug with string data io --- Project.toml | 2 +- src/AlignedBinaryFormat.jl | 92 +++++++++++++++--------------------- src/abffile.jl | 61 ++++-------------------- src/endian.jl | 28 +++++++++++ src/read_write.jl | 95 +++++++++++++++++++++++++++----------- src/showio.jl | 42 +++++++++++++++++ test/runtests.jl | 3 ++ 7 files changed, 188 insertions(+), 135 deletions(-) create mode 100644 src/endian.jl create mode 100644 src/showio.jl diff --git a/Project.toml b/Project.toml index 9c2059e..86f88e0 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "AlignedBinaryFormat" uuid = "94fc9360-1a5e-4d84-93be-ddbadb32b3a7" authors = ["m-wells "] -version = "0.4.0" +version = "0.4.1" [deps] Mmap = "a63ad114-7e13-5084-954f-fe012c677804" diff --git a/src/AlignedBinaryFormat.jl b/src/AlignedBinaryFormat.jl index 33bb762..e9c39b6 100644 --- a/src/AlignedBinaryFormat.jl +++ b/src/AlignedBinaryFormat.jl @@ -3,6 +3,30 @@ using Mmap export abfopen +#--------------------------------------------------------------------------------------------------- + +struct ReadOnlyError <: Exception + io::IOStream +end + +function Base.showerror(io::IO, e::ReadOnlyError) + print(io, "isreadable(", e.io.name, ") = ", isreadable(io)) +end + +check_readable(io::IOStream) = isreadable(io) || throw(ReadOnlyError(io)) + +struct WriteOnlyError <: Exception + io::IOStream +end + +function Base.showerror(io::IO, e::WriteOnlyError) + print(io, "iswritable(", e.io.name, ") = ", iswritable(io)) +end + +check_writable(io::IOStream) = iswritable(io) || throw(WriteOnlyError(io)) + +#--------------------------------------------------------------------------------------------------- + function ImmutableDict(x::Pair, ys::Vararg{Pair}) d = Base.ImmutableDict(x) for y in ys @@ -41,52 +65,7 @@ const ARRAYLOOKUP = ImmutableDict(Base.ImmutableDict{String,Any}(), string(BitArray) => BitArray, string(String) => String) -function write_str(io::IOStream, str::Vararg{String}) - for s in str - write(io, Int64(length(s))) - write(io, s) - end - nothing -end - -function read_str(io::IOStream, n::Int) - k = Vector{Char}(undef,n) - @inbounds for i in 1:n - k[i] = read(io, Char) - end - return join(k) -end - -read_str(io::IOStream) = read_str(io, read(io, Int64)) - -const LIT_ENDIAN = 0x04030201 -const BIG_ENDIAN = 0x01020304 - -const LIT_ENDFLAG = UInt8(0) -const BIG_ENDFLAG = UInt8(255) - -function write_endian(io::IOStream) - if Base.ENDIAN_BOM == LIT_ENDIAN - write(io, LIT_ENDFLAG) - elseif Base.ENDIAN_BOM == BIG_ENDIAN - write(io, BIG_ENDFLAG) - else - error("ENDIAN_BOM of ", Base.ENDIAN_BOM, " not recognized") - end -end - -function read_endian(io::IOStream) - endflag = read(io, UInt8) - if endflag == LIT_ENDFLAG - endian = LIT_ENDIAN - elseif endflag == BIG_ENDFLAG - endian = BIG_ENDIAN - else - error("ENDIAN FLAG of ", endflag, " not recognized") - end - endian == Base.ENDIAN_BOM || error("endian does not match machine endian") - return endian -end +#--------------------------------------------------------------------------------------------------- ## from https://github.com/JuliaLang/julia/blob/master/base/bitarray.jl (2020/01/20) # notes: bits are stored in contiguous chunks @@ -102,9 +81,10 @@ function _sizeof(::Type{BitArray{N}}, sz::NTuple{N,Int64}) where N end _sizeof(::Type{A}, sz::NTuple{N,Int64}) where {T,N,A<:AbstractArray{T,N}} = sizeof(T)*prod(sz) -_sizeof(::Type{String}, sz::Tuple{Int64}) = sizeof(Char)*first(sz) +_sizeof(::Type{String}, n::Int64) = sizeof(Char)*n +_sizeof(s::String) = length(s)*sizeof(Char) -#_sizeof(x::AbstractArray) = _sizeof(typeof(x), size(x)) +#--------------------------------------------------------------------------------------------------- struct AbfKey{N} pos::Int64 @@ -113,17 +93,21 @@ struct AbfKey{N} # used when writing AbfKey(pos::Int64, x::A) where A<:AbstractArray = new{ndims(A)}(pos, A, size(x)) + AbfKey(io::IOStream, x::AbstractString) = new{1}(position(io), String, (1,)) # used when reading - AbfKey(io::IOStream, ::Type{A}, dims::NTuple{N,Int64}) where {A,N} = new{N}(position(io), A, dims) - - # used when writing - AbfKey(io::IOStream, x::AbstractString) = new{1}(position(io), String, (length(x),)) + AbfKey(io::IOStream, ::Type{A}, dims::NTuple{N,Int64} + ) where {T,N,A<:AbstractArray{T,N}} = new{N}(position(io), A, dims) + AbfKey(io::IOStream, ::Type{String}) = new{1}(position(io), String, (1,)) end Base.show(io::IO, a::AbfKey) = print(io, a.T, a.dims) -include("read_write.jl") -include("abffile.jl") +#--------------------------------------------------------------------------------------------------- + +include("./endian.jl") +include("./read_write.jl") +include("./abffile.jl") +include("./showio.jl") end diff --git a/src/abffile.jl b/src/abffile.jl index f9065ff..0345858 100644 --- a/src/abffile.jl +++ b/src/abffile.jl @@ -1,15 +1,15 @@ mutable struct AbfFile io::IOStream - rw::String abfkeys::Base.ImmutableDict{String,AbfKey} loaded::Base.ImmutableDict{String,Union{Array,BitArray,String}} - function AbfFile(filename::String, rw::String) - io = open(filename, rw) + function AbfFile(io::IOStream) abfkeys=Base.ImmutableDict{String,AbfKey}() loaded=Base.ImmutableDict{String,Union{Array,BitArray,String}}() - new(io, rw, abfkeys, loaded) + new(io, abfkeys, loaded) end + + AbfFile(filename::String, rw::String) = AbfFile(open(filename, rw)) end Base.keys(abf::AbfFile) = keys(abf.abfkeys) @@ -32,7 +32,6 @@ function abfopen(filename::String, rw::String) end function Base.close(abf::AbfFile) - abf.rw = "closed" abf.abfkeys = Base.ImmutableDict{String,AbfKey}() abf.loaded = Base.ImmutableDict{String,Union{Array,BitArray,String}}() close(abf.io) @@ -49,6 +48,7 @@ function abfopen(f::Function, args...) end function Base.write(abf::AbfFile, k::String, x) + check_writable(abf.io) seekend(abf.io) abfkey = _write(abf.io::IOStream, k, x) addabfkey!(abf, k, abfkey) @@ -56,12 +56,12 @@ function Base.write(abf::AbfFile, k::String, x) end function Base.read(abf::AbfFile, k::String) - isreadable(abf.io) || error("file is not readable, opened with: ", abf.rw) + check_readable(abf.io) k ∈ keys(abf.loaded) && return abf.loaded[k] abfkey = abf.abfkeys[k] seek(abf.io, abfkey.pos) if abfkey.T == String - x = read_str(abf.io, first(abfkey.dims)) + x = read_str(abf.io) else x = Mmap.mmap(abf.io, abfkey.T, abfkey.dims) end @@ -73,53 +73,8 @@ Base.getindex(abf::AbfFile, k::String) = read(abf, k) function Base.setindex!(abf::AbfFile, v, k::String) if k ∈ keys(abf) - error("cannot overwrite exists key. You may have wanted to do \"abf[", k, + error("cannot overwrite existing key. You may have wanted to do \"abf[", k, "] .= x\" instead (element assignment)") end write(abf, k, v) end - -#--------------------------------------------------------------------------------------------------- - -function cpad(str, n::Int) - nspace = (n - length(str))/2 - repeat(" ", floor(Int, nspace))*str*repeat(" ", ceil(Int, nspace)) -end - -function Base.show(io::IO, abf::T) where T<:AbfFile - println(io, T, "(", abf.rw, " ", abf.io.name, ")") - - ktitle = "label" - ttitle = "type" - ltitle = "status" - keypad = length(ktitle) - typepad = length(ttitle) - loadpad = length(ltitle) - - for (k,t) in abf.abfkeys - keypad = max(keypad, length(string(k))) - typepad = max(typepad, length(string(t))) - if k in keys(abf.loaded) - loadpad = max(loadpad, length("loaded")) - else - loadpad = max(loadpad, length("not loaded")) - end - end - - indent = "" - - println(io, indent, "┌─", repeat('─', keypad) , "─┬─", repeat('─', typepad), "─┬─", repeat('─', loadpad), "─┐") - println(io, indent, "│ ", cpad(ktitle, keypad), " │ ", cpad(ttitle, typepad), " │ ", cpad(ltitle, loadpad), " │") - println(io, indent, "├─", repeat('─', keypad) , "─┼─", repeat('─', typepad), "─┼─", repeat('─', loadpad), "─┤") - - for (k,t) in sort(collect(abf.abfkeys), by=first) - print(io,indent, "│ ", cpad(k, keypad), " │ ", cpad(string(t), typepad), " │ ") - if k in keys(abf.loaded) - println(io, cpad("loaded", loadpad), " │") - else - println(io, cpad("not loaded", loadpad), " │") - end - end - print(io, indent, "└─", repeat('─', keypad) , "─┴─", repeat('─', typepad), "─┴─", repeat('─', loadpad), "─┘") -end - diff --git a/src/endian.jl b/src/endian.jl new file mode 100644 index 0000000..a6bb0b3 --- /dev/null +++ b/src/endian.jl @@ -0,0 +1,28 @@ +const LIT_ENDIAN = 0x04030201 +const BIG_ENDIAN = 0x01020304 + +const LIT_ENDFLAG = UInt8(0) +const BIG_ENDFLAG = UInt8(255) + +function write_endian(io::IOStream) + if Base.ENDIAN_BOM == LIT_ENDIAN + write(io, LIT_ENDFLAG) + elseif Base.ENDIAN_BOM == BIG_ENDIAN + write(io, BIG_ENDFLAG) + else + error("ENDIAN_BOM of ", Base.ENDIAN_BOM, " not recognized") + end +end + +function read_endian(io::IOStream) + endflag = read(io, UInt8) + if endflag == LIT_ENDFLAG + endian = LIT_ENDIAN + elseif endflag == BIG_ENDFLAG + endian = BIG_ENDIAN + else + error("ENDIAN FLAG of ", UInt8(endflag), " not recognized") + end + endian == Base.ENDIAN_BOM || error("endian does not match machine endian") + return endian +end diff --git a/src/read_write.jl b/src/read_write.jl index 6561c63..5b6e9b3 100644 --- a/src/read_write.jl +++ b/src/read_write.jl @@ -1,10 +1,53 @@ +write_size(io::IOStream, x::AbstractArray) = write(io, Int64.(size(x))...) +write_size(io::IOStream, x::AbstractString) = write(io, Int64(length(x))) + +function read_size(io::IOStream, data::Type{A}) where {T,N,A<:AbstractArray{T,N}} + ntuple(i -> read(io, Int64), Val(N)) +end +read_size(io::IOStream, ::Type{String}) = read(io, Int64) + +#--------------------------------------------------------------------------------------------------- + +function _write_str(io::IOStream, str::String) + for s in str + write(io, Char(s)) + end +end + +""" + write_str(io, str) + +Write out an Int64 indicating length of string then the characters of the string +""" +function write_str(io::IOStream, str::String) + write_size(io, str) + _write_str(io, str) + nothing +end + +""" + read_str(io, n::Int) + +Read in `n` characters and return the combined String +""" +function read_str(io::IOStream, n::Int = read_size(io::IOStream, String)) + k = Vector{Char}(undef,n) + @inbounds for i in 1:n + k[i] = read(io, Char) + end + return join(k) +end + +#--------------------------------------------------------------------------------------------------- + function write_type(io::IOStream, data::BitArray) write_str(io, string(BitArray)) write(io, Int64(ndims(data))) end function write_type(io::IOStream, data::AbstractArray) - write_str(io, string(Array), string(eltype(data))) + write_str(io, string(Array)) + write_str(io, string(eltype(data))) write(io, Int64(ndims(data))) end @@ -17,15 +60,6 @@ read_type(io::IOStream) = read_type(io, ARRAYLOOKUP[read_str(io)]) #--------------------------------------------------------------------------------------------------- -write_size(io::IOStream, x::AbstractArray) = write(io, Int64.(size(x))...) - -function read_size(io::IOStream, data::Type{A}) where {T,N,A<:AbstractArray{T,N}} - ntuple(i -> read(io, Int64), Val(N)) -end -read_size(io::IOStream, str::Type{String}) = (read(io, Int64),) - -#--------------------------------------------------------------------------------------------------- - """ nbytes is the number of bytes to align too """ @@ -48,7 +82,6 @@ align(io::IOStream, ::Type{String}) = nothing align(io::IOStream, ::A) where A<:AbstractArray = align(io,A) #--------------------------------------------------------------------------------------------------- - function _write(io::IOStream, label::String, data::A) where A<:Union{Array,BitArray} write_endian(io) write_str(io, label) @@ -62,28 +95,36 @@ function _write(io::IOStream, label::String, data::A) where A<:Union{Array,BitAr return abfkey end -# everything but actually mmaping -# returns AbfKey -function _read(io::IOStream) - endian = read_endian(io) - label = read_str(io) - - type = read_type(io) - dims = read_size(io, type) - - align(io, type) - abfkey = AbfKey(io, type, dims) - skip(io, _sizeof(type, dims)) - return (label, abfkey) -end - function _write(io::IOStream, label::String, str::AbstractString) write_endian(io) write_str(io, label) write_type(io, str) - abfkey = AbfKey(io, str) write_str(io, str) + + return abfkey +end + +function _read(io::IOStream, ::Type{String}) + abfkey = AbfKey(io, String) + read_str(io) + return abfkey +end + +function _read(io::IOStream, type::Type{A}) where A<:AbstractArray + dims = read_size(io, type) + align(io, type) + abfkey = AbfKey(io, type, dims) + skip(io, _sizeof(type, dims)) return abfkey end + +function _read(io::IOStream) + endian = read_endian(io) + label = read_str(io) + + type = read_type(io) + abfkey = _read(io, type) + return (label, abfkey) +end diff --git a/src/showio.jl b/src/showio.jl new file mode 100644 index 0000000..eb7e0d5 --- /dev/null +++ b/src/showio.jl @@ -0,0 +1,42 @@ +function cpad(str, n::Int) + nspace = (n - length(str))/2 + repeat(" ", floor(Int, nspace))*str*repeat(" ", ceil(Int, nspace)) +end + +function Base.show(io::IO, abf::T) where T<:AbfFile + println(io, T, "(", abf.rw, " ", abf.io.name, ")") + + ktitle = "label" + ttitle = "type" + ltitle = "status" + keypad = length(ktitle) + typepad = length(ttitle) + loadpad = length(ltitle) + + for (k,t) in abf.abfkeys + keypad = max(keypad, length(string(k))) + typepad = max(typepad, length(string(t))) + if k in keys(abf.loaded) + loadpad = max(loadpad, length("loaded")) + else + loadpad = max(loadpad, length("not loaded")) + end + end + + indent = "" + + println(io, indent, "┌─", repeat('─', keypad) , "─┬─", repeat('─', typepad), "─┬─", repeat('─', loadpad), "─┐") + println(io, indent, "│ ", cpad(ktitle, keypad), " │ ", cpad(ttitle, typepad), " │ ", cpad(ltitle, loadpad), " │") + println(io, indent, "├─", repeat('─', keypad) , "─┼─", repeat('─', typepad), "─┼─", repeat('─', loadpad), "─┤") + + for (k,t) in sort(collect(abf.abfkeys), by=first) + print(io,indent, "│ ", cpad(k, keypad), " │ ", cpad(string(t), typepad), " │ ") + if k in keys(abf.loaded) + println(io, cpad("loaded", loadpad), " │") + else + println(io, cpad("not loaded", loadpad), " │") + end + end + print(io, indent, "└─", repeat('─', keypad) , "─┴─", repeat('─', typepad), "─┴─", repeat('─', loadpad), "─┘") +end + diff --git a/test/runtests.jl b/test/runtests.jl index c970fa4..f1ea933 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -123,13 +123,16 @@ end temp = tempname() try blah = join(rand(Char,10))*join(rand(Char,9)) + x = rand(20,4) abfopen(temp, "w+") do abf write(abf, "blah", blah) + abf["x"] = x end abfopen(temp, "r") do abf _blah = read(abf, "blah") @test blah == _blah + @test x == abf["x"] end finally