Skip to content

Commit 2639085

Browse files
authored
Merge pull request #27 from jaakkor2/compactsubtype
Implement reading of compact subtype character format
2 parents 5b151d4 + 7478dfd commit 2639085

File tree

7 files changed

+56
-16
lines changed

7 files changed

+56
-16
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "JMPReader"
22
uuid = "d9f7e686-cf87-4d12-8d7a-0e9b8c9fba29"
33
authors = ["Jaakko Ruohio <[email protected]>"]
4-
version = "0.1.15"
4+
version = "0.1.16"
55

66
[deps]
77
CodecZlib = "944b1d66-785c-5afd-91f1-9de20f533193"

docs/src/dev.md

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,15 @@ For example,
1111
```julia
1212
JMPReader.scandir(joinpath(pathof(JMPReader), "..", "..", "test"))
1313
```
14-
reads 12 JMP-files, and
14+
reads 12 JMP-files,
1515
```julia
1616
JMPReader.scandir(raw"C:\Program Files\SAS\JMPPRO\17\Samples\Data")
1717
```
18-
reads successfully 605 JMP-files.
18+
reads successfully 605 JMP-files, and
19+
```julia
20+
JMPReader.scandir(raw"C:\Program Files\JMP\JMPPRO\18\Samples\Data")
21+
```
22+
reads successfully 612 JMP-files.
1923

2024
## Looking into the binary .jmp file
2125

src/column.jl

Lines changed: 31 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ function column_data(io, info, i::Int, deflatebuffer::Vector{UInt8})
132132
end
133133

134134
# character
135-
if dt1 in [0x02, 0x09] && dt2 in [0x01, 0x02]
135+
if dt1 in [0x02, 0x09] && dt2 in [0x01, 0x02]
136136

137137
# constant width
138138
if ([dt3, dt4] == [0x00, 0x00] && dt5 > 0) ||
@@ -146,15 +146,37 @@ function column_data(io, info, i::Int, deflatebuffer::Vector{UInt8})
146146
# variable width
147147
if [dt3, dt4, dt5] == [0x00, 0x00, 0x00]
148148
if dt1 == 0x09 # compressed
149-
widthbytes = a[9]
150-
if widthbytes == 1
151-
widths = reinterpret(Int8, @view a[13 .+ (1:info.nrows)])
152-
data = a[13 + info.nrows + 1:end]
153-
elseif widthbytes == 2
154-
widths = reinterpret(Int16, @view a[13 .+ (1:2*info.nrows)])
155-
data = a[13 + 2*info.nrows + 1:end]
149+
if reinterpret(Int64, a[1:8])[1] == length(a) # pooled data
150+
io2 = IOBuffer(a)
151+
reclen = read(io2, Int64)
152+
foo = read(io2, 9)
153+
foo2 = read(io2, Int64)
154+
# indices to pool
155+
wb = read(io2, Int8)
156+
T = wb == 1 ? Int8 : Int16
157+
idx = [read(io2, T) for _ in 1:info.nrows]
158+
# pool
159+
npools = maximum(idx)
160+
wb = read(io2, Int8)
161+
T = wb == 1 ? Int8 : wb == 2 ? Int16 : error()
162+
pool = []
163+
for i = 1:npools
164+
n = read(io2, T)
165+
push!(pool, String(read(io2, n)))
166+
end
167+
str = [idx == 0 ? "" : pool[idx] for idx in idx]
168+
return str
156169
else
157-
throw(ErrorException("Unknown `widthbytes=$widthbytes`, some offset is wrong somewhere, column i=$i"))
170+
widthbytes = a[9]
171+
if widthbytes == 1
172+
widths = reinterpret(Int8, @view a[13 .+ (1:info.nrows)])
173+
data = a[13 + info.nrows + 1:end]
174+
elseif widthbytes == 2
175+
widths = reinterpret(Int16, @view a[13 .+ (1:2*info.nrows)])
176+
data = a[13 + 2*info.nrows + 1:end]
177+
else
178+
throw(ErrorException("Unknown `widthbytes=$widthbytes`, some offset is wrong somewhere, column i=$i"))
179+
end
158180
end
159181
else # uncompressed
160182
# continue after dt1,...,dt6 were read

src/constants.jl

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,6 @@ const GZIP_SECTION_START = [0xef, 0xbe, 0xfe, 0xca] # cafebeef
88
# JMP uses 1904 date system
99
const JMP_STARTDATE = DateTime(1904, 1, 1)
1010

11-
# offset for number of rows
12-
const OFFSET_NROWS = 368
13-
1411
# row state
1512
const rowstatemarkers = [
1613
'', '+', 'X', '',

src/metadata.jl

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
function metadata(io)
2-
seek(io, OFFSET_NROWS)
2+
seekstart(io)
3+
seq = [0x07, 0x00, 0x08, 0x00, 0x00, 0x00]
4+
readuntil(io, seq)
5+
seek(io, position(io) - 38)
6+
37
nrows = read(io, Int64)
48
ncols = read(io, Int32)
59
foo1 = read_reals(io, Int16, 5) # ??

test/compact.jmp

2.04 KB
Binary file not shown.

test/runtests.jl

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,4 +129,17 @@ end
129129
@test df.rowstate3[2].marker == ''
130130
@test df.rowstate3[3].marker == ''
131131
@test df.rowstate2[3].color == RGB{N0f8}(0.753,0.753,0.753)
132+
end
133+
134+
@testset "compact subtype" begin
135+
df = readjmp("compact.jmp")
136+
data = ["aa", "b", "ccc", "dd", "dd"]
137+
@test df.normalsubtype == data
138+
@test df.compactsubtype == data
139+
@test df.longcompact[1] == repeat('x', 254)
140+
@test df.longcompact[2] == repeat('y', 255)
141+
@test isempty(df.longcompact[3])
142+
z = repeat('z', 256)
143+
@test df.longcompact[4] == z
144+
@test df.longcompact[5] == z
132145
end

0 commit comments

Comments
 (0)