Skip to content

Commit

Permalink
Merge pull request #828 from tealeg/sync-pools-for-ssts
Browse files Browse the repository at this point in the history
Use sync pools for shared string parser
  • Loading branch information
tealeg authored Nov 3, 2024
2 parents faa27f6 + 775f9cf commit 3476977
Show file tree
Hide file tree
Showing 5 changed files with 184 additions and 96 deletions.
9 changes: 5 additions & 4 deletions file.go
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,7 @@ func autoFilterDefinedName(sheet *Sheet, sheetIndex int) (*xlsxDefinedName, erro
// representing the file in terms of the structure of an XLSX file.
func (f *File) MakeStreamParts() (map[string]string, error) {
var parts map[string]string
var refTable *RefTable = NewSharedStringRefTable(10000) // 10000 is arbitrary
var refTable *RefTable = NewSharedStringRefTable(DEFAULT_REFTABLE_SIZE)
refTable.isWrite = true
var workbookRels WorkBookRels = make(WorkBookRels)
var err error
Expand Down Expand Up @@ -465,7 +465,7 @@ func (f *File) MakeStreamParts() (map[string]string, error) {
// MarshallParts constructs a map of file name to XML content representing the file
// in terms of the structure of an XLSX file.
func (f *File) MarshallParts(zipWriter *zip.Writer) error {
var refTable *RefTable = NewSharedStringRefTable(10000) // 10000 is arbitrary
var refTable *RefTable = NewSharedStringRefTable(DEFAULT_REFTABLE_SIZE)
refTable.isWrite = true
var workbookRels WorkBookRels = make(WorkBookRels)
var err error
Expand Down Expand Up @@ -650,9 +650,10 @@ func (f *File) MarshallParts(zipWriter *zip.Writer) error {
// Here, value would be set to the raw value of the cell A1 in the
// first sheet in the XLSX file.
func (f *File) ToSlice() (output [][][]string, err error) {
output = [][][]string{}
sheetCount := len(f.Sheets)
output = make([][][]string, 0, sheetCount)
for _, sheet := range f.Sheets {
s := [][]string{}
s := make([][]string, 0, sheet.MaxRow)
err := sheet.ForEachRow(func(row *Row) error {
r := []string{}
err := row.ForEachCell(func(cell *Cell) error {
Expand Down
126 changes: 114 additions & 12 deletions lib.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"runtime/debug"
"strconv"
"strings"
"sync"
)

const (
Expand All @@ -22,6 +23,26 @@ const (
externalSheetBangChar = "!"
)

var (
tokPool = sync.Pool{
New: func() interface{} {
return &xml.StartElement{}
},
}

xlsxSIPool = sync.Pool{
New: func() interface{} {
return &xlsxSI{}
},
}

xmlAttrPool = sync.Pool{
New: func() interface{} {
return &xml.Attr{}
},
}
)

// XLSXReaderError is the standard error type for otherwise undefined
// errors in the XSLX reading process.
type XLSXReaderError struct {
Expand Down Expand Up @@ -845,15 +866,104 @@ func readSheetsFromZipFile(f *zip.File, file *File, sheetXMLMap map[string]strin
return sheetsByName, sheets, err
}

func readSharedStrings(rc io.Reader) (*RefTable, error) {
var err error
var decoder *xml.Decoder
var reftable *RefTable
var tok xml.Token
var count int
var countS string
var ok bool
var si *xlsxSI
var attr *xml.Attr

wrap := func(err error) (*RefTable, error) {
return nil, fmt.Errorf("readSharedStrings: %w", err)
}

decoder = xml.NewDecoder(rc)

for {
tok = tokPool.Get().(xml.Token)
tok, err = decoder.Token()
if tok == nil {
break
} else if err == io.EOF {
break
}
if err != nil {
return wrap(err)
}
switch ty := tok.(type) {
case xml.StartElement:
switch ty.Name.Local {
case "sst":
attr = xmlAttrPool.Get().(*xml.Attr)
ok = false
for _, (*attr) = range ty.Attr {
if attr.Name.Local == "count" {
countS = attr.Value
ok = true
break
}
}
xmlAttrPool.Put(attr)
if !ok {
// No hints on the size, so we'll just start with
// a decent number of entries to avoid small
// allocs.
reftable = NewSharedStringRefTable(DEFAULT_REFTABLE_SIZE)
reftable.isWrite = false //Todo, do we actually use this?
} else {
count, err = strconv.Atoi(countS)
if err != nil {
return wrap(err)
}
reftable = NewSharedStringRefTable(count)
reftable.isWrite = false //Todo, do we actually use this?
}
case "si":
if reftable == nil {
return wrap(fmt.Errorf("si encountered before reftable created"))
}
si = xlsxSIPool.Get().(*xlsxSI)
if err = decoder.DecodeElement(si, &ty); err != nil {
xlsxSIPool.Put(si)
return wrap(err)
}
if len(si.R) > 0 {
reftable.AddRichText(xmlToRichText(si.R))
} else {
reftable.AddString(si.T.getText())
}
// clean up before returning to the pool, without
// these lines you'll see weird effects when reading
// another set of shared strings
si.R = nil
si.T = nil
xlsxSIPool.Put(si)
default:
// Do nothing
}
default:
// Do nothing
}
tokPool.Put(tok)
}

if reftable == nil {
panic("Unitialised reftable")
}
return reftable, nil

}

// readSharedStringsFromZipFile() is an internal helper function to
// extract a reference table from the sharedStrings.xml file within
// the XLSX zip file.
func readSharedStringsFromZipFile(f *zip.File) (*RefTable, error) {
var sst *xlsxSST
var err error
var rc io.ReadCloser
var decoder *xml.Decoder
var reftable *RefTable

wrap := func(err error) (*RefTable, error) {
return nil, fmt.Errorf("readSharedStringsFromZipFile: %w", err)
Expand All @@ -870,15 +980,7 @@ func readSharedStringsFromZipFile(f *zip.File) (*RefTable, error) {
return wrap(err)
}
defer rc.Close()

sst = new(xlsxSST)
decoder = xml.NewDecoder(rc)
err = decoder.Decode(sst)
if err != nil {
return wrap(err)
}
reftable = MakeSharedStringRefTable(sst)
return reftable, nil
return readSharedStrings(rc)
}

// readStylesFromZipFile() is an internal helper function to
Expand Down
76 changes: 29 additions & 47 deletions lib_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,7 @@ func TestLib(t *testing.T) {
// })

csRunC(c, "ReadRowsFromSheet", func(c *qt.C, constructor CellStoreConstructor) {
var err error
var sharedstringsXML = bytes.NewBufferString(`
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<sst xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" count="4" uniqueCount="4">
Expand Down Expand Up @@ -337,14 +338,12 @@ func TestLib(t *testing.T) {
footer="0.3"/>
</worksheet>`)
worksheet := new(xlsxWorksheet)
err := xml.NewDecoder(sheetxml).Decode(worksheet)
c.Assert(err, qt.IsNil)
sst := new(xlsxSST)
err = xml.NewDecoder(sharedstringsXML).Decode(sst)
err = xml.NewDecoder(sheetxml).Decode(worksheet)
c.Assert(err, qt.IsNil)
file := new(File)
file.cellStoreConstructor = constructor
file.referenceTable = MakeSharedStringRefTable(sst)
file.referenceTable, err = readSharedStrings(sharedstringsXML)
c.Assert(err, qt.IsNil)
sheet, err := NewSheet("test")
c.Assert(err, qt.IsNil)
lt := make(hyperlinkTable)
Expand Down Expand Up @@ -433,12 +432,10 @@ func TestLib(t *testing.T) {
worksheet := new(xlsxWorksheet)
err := xml.NewDecoder(sheetxml).Decode(worksheet)
c.Assert(err, qt.IsNil)
sst := new(xlsxSST)
err = xml.NewDecoder(sharedstringsXML).Decode(sst)
c.Assert(err, qt.IsNil)
file := new(File)
file.cellStoreConstructor = constructor
file.referenceTable = MakeSharedStringRefTable(sst)
file.referenceTable, err = readSharedStrings(sharedstringsXML)
c.Assert(err, qt.IsNil)

sheet, err := NewSheetWithCellStore("test", constructor)
c.Assert(err, qt.IsNil)
Expand Down Expand Up @@ -486,13 +483,11 @@ func TestLib(t *testing.T) {
worksheet := new(xlsxWorksheet)
err := xml.NewDecoder(sheetxml).Decode(worksheet)
c.Assert(err, qt.IsNil)
sst := new(xlsxSST)
err = xml.NewDecoder(sharedstringsXML).Decode(sst)
c.Assert(err, qt.IsNil)

file := new(File)
file.cellStoreConstructor = constructor
file.referenceTable = MakeSharedStringRefTable(sst)
file.referenceTable, err = readSharedStrings(sharedstringsXML)
c.Assert(err, qt.IsNil)
sheet, err := NewSheetWithCellStore("test", constructor)
c.Assert(err, qt.IsNil)
lt := make(hyperlinkTable)
Expand Down Expand Up @@ -568,13 +563,11 @@ func TestLib(t *testing.T) {
worksheet := new(xlsxWorksheet)
err := xml.NewDecoder(sheetxml).Decode(worksheet)
c.Assert(err, qt.IsNil)
sst := new(xlsxSST)
err = xml.NewDecoder(sharedstringsXML).Decode(sst)
c.Assert(err, qt.IsNil)

file := new(File)
file.cellStoreConstructor = constructor
file.referenceTable = MakeSharedStringRefTable(sst)
file.referenceTable, err = readSharedStrings(sharedstringsXML)
c.Assert(err, qt.IsNil)
sheet, err := NewSheetWithCellStore("test", constructor)
c.Assert(err, qt.IsNil)
lt := make(hyperlinkTable)
Expand Down Expand Up @@ -717,12 +710,11 @@ func TestLib(t *testing.T) {
worksheet := new(xlsxWorksheet)
err := xml.NewDecoder(sheetxml).Decode(worksheet)
c.Assert(err, qt.IsNil)
sst := new(xlsxSST)
err = xml.NewDecoder(sharedstringsXML).Decode(sst)
c.Assert(err, qt.IsNil)

file := new(File)
file.cellStoreConstructor = constructor
file.referenceTable = MakeSharedStringRefTable(sst)
file.referenceTable, err = readSharedStrings(sharedstringsXML)
c.Assert(err, qt.IsNil)
sheet, err := NewSheetWithCellStore("test", constructor)
c.Assert(err, qt.IsNil)
lt := make(hyperlinkTable)
Expand Down Expand Up @@ -764,13 +756,10 @@ func TestLib(t *testing.T) {
err := xml.NewDecoder(sheetxml).Decode(worksheet)
c.Assert(err, qt.IsNil)

sst := new(xlsxSST)
err = xml.NewDecoder(sharedstringsXML).Decode(sst)
c.Assert(err, qt.IsNil)

file := new(File)
file.cellStoreConstructor = constructor
file.referenceTable = MakeSharedStringRefTable(sst)
file.referenceTable, err = readSharedStrings(sharedstringsXML)
c.Assert(err, qt.IsNil)
sheet, err := NewSheetWithCellStore("test", constructor)
c.Assert(err, qt.IsNil)
lt := make(hyperlinkTable)
Expand Down Expand Up @@ -882,12 +871,11 @@ func TestLib(t *testing.T) {
worksheet := new(xlsxWorksheet)
err := xml.NewDecoder(sheetxml).Decode(worksheet)
c.Assert(err, qt.IsNil)
sst := new(xlsxSST)
err = xml.NewDecoder(sharedstringsXML).Decode(sst)
c.Assert(err, qt.IsNil)
file := new(File)
file.cellStoreConstructor = constructor
file.referenceTable = MakeSharedStringRefTable(sst)
file.referenceTable, err = readSharedStrings(sharedstringsXML)
c.Assert(err, qt.IsNil)

sheet, err := NewSheetWithCellStore("test", constructor)
c.Assert(err, qt.IsNil)

Expand Down Expand Up @@ -964,12 +952,11 @@ func TestLib(t *testing.T) {
worksheet := new(xlsxWorksheet)
err := xml.NewDecoder(sheetxml).Decode(worksheet)
c.Assert(err, qt.IsNil)
sst := new(xlsxSST)
err = xml.NewDecoder(sharedstringsXML).Decode(sst)
c.Assert(err, qt.IsNil)
file := new(File)
file.cellStoreConstructor = constructor
file.referenceTable = MakeSharedStringRefTable(sst)
file.referenceTable, err = readSharedStrings(sharedstringsXML)
c.Assert(err, qt.IsNil)

sheet, err := NewSheetWithCellStore("test", constructor)
c.Assert(err, qt.IsNil)
lt := make(hyperlinkTable)
Expand Down Expand Up @@ -1043,12 +1030,11 @@ func TestLib(t *testing.T) {
worksheet := new(xlsxWorksheet)
err := xml.NewDecoder(sheetxml).Decode(worksheet)
c.Assert(err, qt.IsNil)
sst := new(xlsxSST)
err = xml.NewDecoder(sharedstringsXML).Decode(sst)
c.Assert(err, qt.IsNil)
file := new(File)
file.cellStoreConstructor = constructor
file.referenceTable = MakeSharedStringRefTable(sst)
file.referenceTable, err = readSharedStrings(sharedstringsXML)
c.Assert(err, qt.IsNil)

sheet, err := NewSheetWithCellStore("test", constructor)
c.Assert(err, qt.IsNil)
lt := make(hyperlinkTable)
Expand Down Expand Up @@ -1334,13 +1320,10 @@ func TestLib(t *testing.T) {
err := xml.NewDecoder(sheetXML).Decode(worksheet)
c.Assert(err, qt.IsNil)

sst := new(xlsxSST)
err = xml.NewDecoder(sharedstringsXML).Decode(sst)
c.Assert(err, qt.IsNil)

file := new(File)
file.cellStoreConstructor = constructor
file.referenceTable = MakeSharedStringRefTable(sst)
file.referenceTable, err = readSharedStrings(sharedstringsXML)
c.Assert(err, qt.IsNil)

sheet, err := NewSheetWithCellStore("test", constructor)
c.Assert(err, qt.IsNil)
Expand Down Expand Up @@ -1433,12 +1416,11 @@ func TestReadRowsFromSheet(t *testing.T) {
worksheet := new(xlsxWorksheet)
err := xml.NewDecoder(sheetxml).Decode(worksheet)
c.Assert(err, qt.IsNil)
sst := new(xlsxSST)
err = xml.NewDecoder(sharedstringsXML).Decode(sst)
c.Assert(err, qt.IsNil)
file := new(File)
file.cellStoreConstructor = constructor
file.referenceTable = MakeSharedStringRefTable(sst)
file.referenceTable, err = readSharedStrings(sharedstringsXML)
c.Assert(err, qt.IsNil)

worksheet.mapMergeCells()
sheet, err := NewSheetWithCellStore("test", constructor)
c.Assert(err, qt.IsNil)
Expand Down
Loading

0 comments on commit 3476977

Please sign in to comment.