Skip to content

Commit

Permalink
feat: add scanner interface
Browse files Browse the repository at this point in the history
  • Loading branch information
TuSKan authored and twpayne committed Feb 6, 2024
1 parent d42f4cc commit 53250cb
Show file tree
Hide file tree
Showing 12 changed files with 1,352 additions and 20 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,4 @@ jobs:
- name: build
run: go build ./...
- name: test
run: go test ./...
run: go test ./...
44 changes: 44 additions & 0 deletions cpg.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
package shapefile

import (
"archive/zip"
"fmt"
"io"
"strings"

"golang.org/x/net/html/charset"
)

// CPG a CPG is a .cpg file.
type CPG struct {
Charset string
}

// ReadCPG reads a CPG from an io.Reader.
func ReadCPG(r io.Reader, _ int64) (*CPG, error) {
data, err := io.ReadAll(r)
if err != nil {
return nil, err
}
enc, name := charset.Lookup(strings.ToLower(string(data)))
if enc == nil {
return nil, fmt.Errorf("unknown charset '%s'", (string(data)))
}
return &CPG{
Charset: name,
}, nil
}

// ReadCPGZipFile reads a CPG from a *zip.File.
func ReadCPGZipFile(zipFile *zip.File) (*CPG, error) {
readCloser, err := zipFile.Open()
if err != nil {
return nil, err
}
defer readCloser.Close()
cpg, err := ReadCPG(readCloser, int64(zipFile.UncompressedSize64))
if err != nil {
return nil, fmt.Errorf("%s: %w", zipFile.Name, err)
}
return cpg, nil
}
42 changes: 28 additions & 14 deletions dbf.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ package shapefile

// FIXME support dBase version 7 files if needed, see https://www.dbase.com/Knowledgebase/INT/db7_file_fmt.htm
// FIXME work through https://www.clicketyclick.dk/databases/xbase/format/dbf.html and add any missing features
// FIXME add unmarshaller that unmarshals a record into a Go struct with `dbf:"..."` tags?s
// FIXME add unmarshaller that unmarshalls a record into a Go struct with `dbf:"..."` tags?s
// FIXME validate logical implementation
// FIXME add support for memos

Expand All @@ -17,6 +17,8 @@ import (
"strings"
"time"

"golang.org/x/net/html/charset"
"golang.org/x/text/encoding"
"golang.org/x/text/encoding/charmap"
)

Expand All @@ -27,12 +29,12 @@ const (

var (
knownFieldTypes = map[byte]struct{}{
'C': {},
'D': {},
'F': {},
'L': {},
'M': {},
'N': {},
'C': {}, // Character
'D': {}, // Date
'F': {}, // Floating point binary numeric
'L': {}, // Binary coded decimal numeric
'M': {}, // Memo
'N': {}, // Numeric
}

knownLogicalValues = map[byte]any{
Expand All @@ -46,8 +48,6 @@ var (
't': true,
'y': true,
}

iso8859_1Decoder = charmap.ISO8859_1.NewDecoder()
)

// A DBFHeader is a DBF header.
Expand Down Expand Up @@ -86,6 +86,7 @@ type ReadDBFOptions struct {
MaxHeaderSize int
MaxRecordSize int
MaxRecords int
Charset string
}

// A DBFMemo is a DBF memo.
Expand Down Expand Up @@ -145,6 +146,16 @@ func ReadDBF(r io.Reader, _ int64, options *ReadDBFOptions) (*DBF, error) {
return nil, errors.New("invalid total length of fields")
}

var decoder *encoding.Decoder
if options != nil && options.Charset != "" {
enc, _ := charset.Lookup(options.Charset)
if enc == nil {
return nil, fmt.Errorf("unknown charset '%s'", options.Charset)
}
decoder = enc.NewDecoder()
} else {
decoder = charmap.ISO8859_1.NewDecoder()
}
records := make([][]any, 0, header.Records)
for i := 0; i < header.Records; i++ {
recordData := make([]byte, header.RecordSize)
Expand All @@ -158,7 +169,7 @@ func ReadDBF(r io.Reader, _ int64, options *ReadDBFOptions) (*DBF, error) {
for _, fieldDescriptor := range fieldDescriptors {
fieldData := recordData[offset : offset+fieldDescriptor.Length]
offset += fieldDescriptor.Length
field, err := fieldDescriptor.ParseRecord(fieldData)
field, err := fieldDescriptor.ParseRecord(fieldData, decoder)
if err != nil {
return nil, fmt.Errorf("field %s: %w", fieldDescriptor.Name, err)
}
Expand Down Expand Up @@ -267,10 +278,10 @@ func (d *DBF) Record(i int) map[string]any {
}

// ParseRecord parses a record from data.
func (d *DBFFieldDescriptor) ParseRecord(data []byte) (any, error) {
func (d *DBFFieldDescriptor) ParseRecord(data []byte, decoder *encoding.Decoder) (any, error) {
switch d.Type {
case 'C':
return parseCharacter(data)
return parseCharacter(data, decoder)
case 'D':
return parseDate(data)
case 'F':
Expand All @@ -296,8 +307,11 @@ func TrimTrailingZeros(data []byte) []byte {
return nil
}

func parseCharacter(data []byte) (string, error) {
return iso8859_1Decoder.String(string(bytes.TrimSpace(TrimTrailingZeros(data))))
func parseCharacter(data []byte, decoder *encoding.Decoder) (string, error) {
if decoder == nil {
return "", fmt.Errorf("decoder is nil")
}
return decoder.String(string(bytes.TrimSpace(TrimTrailingZeros(data))))
}

func parseDate(data []byte) (time.Time, error) {
Expand Down
2 changes: 1 addition & 1 deletion dbf_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import (
func FuzzReadDBF(f *testing.F) {
require.NoError(f, addFuzzDataFromFS(f, os.DirFS("."), "testdata", ".dbf"))

f.Fuzz(func(t *testing.T, data []byte) {
f.Fuzz(func(_ *testing.T, data []byte) {
r := bytes.NewReader(data)
_, _ = ReadDBF(r, int64(len(data)), &ReadDBFOptions{
MaxHeaderSize: 4096,
Expand Down
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@ go 1.19
require (
github.com/stretchr/testify v1.8.1
github.com/twpayne/go-geom v1.4.4
golang.org/x/net v0.0.0-20220722155237-a158d28d115b
golang.org/x/text v0.4.0
)

require (
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
golang.org/x/exp v0.0.0-20240103183307-be819d1f06fc // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)
4 changes: 4 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@ github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKs
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/twpayne/go-geom v1.4.4 h1:bcCPAvvNSzjmpUqR0Uqh39ClCKtPx6kZVR7EakQaVJI=
github.com/twpayne/go-geom v1.4.4/go.mod h1:Kz4sX4LtdesDQgkhsMERazLlH/NiCg90s6FPaNr0KNI=
golang.org/x/exp v0.0.0-20240103183307-be819d1f06fc h1:ao2WRsKSzW6KuUY9IWPwWahcHCgR0s52IfwutMfEbdM=
golang.org/x/exp v0.0.0-20240103183307-be819d1f06fc/go.mod h1:iRJReGqOEeBhDZGkGbynYwcHlctCvnjTYIamk7uXpHI=
golang.org/x/net v0.0.0-20220722155237-a158d28d115b h1:PxfKdU9lEEDYjdIzOtC4qFWgkU2rGHdKlKowJSMN9h0=
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/text v0.4.0 h1:BrVqGRd7+k1DiOgtnFvAkoQEWQvBc25ouMJM6429SFg=
golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
Expand Down
Loading

0 comments on commit 53250cb

Please sign in to comment.