Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: simple key handling upto 1024 chars. #1048

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 45 additions & 11 deletions emitterc.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ package yaml
import (
"bytes"
"fmt"
"unicode/utf8"
)

// Flush the buffer if needed.
Expand Down Expand Up @@ -162,10 +163,9 @@ func yaml_emitter_emit(emitter *yaml_emitter_t, event *yaml_event_t) bool {
// Check if we need to accumulate more events before emitting.
//
// We accumulate extra
// - 1 event for DOCUMENT-START
// - 2 events for SEQUENCE-START
// - 3 events for MAPPING-START
//
// - 1 event for DOCUMENT-START
// - 2 events for SEQUENCE-START
// - 3 events for MAPPING-START
func yaml_emitter_need_more_events(emitter *yaml_emitter_t) bool {
if emitter.events_head == len(emitter.events) {
return true
Expand Down Expand Up @@ -241,7 +241,7 @@ func yaml_emitter_increase_indent(emitter *yaml_emitter_t, flow, indentless bool
emitter.indent += 2
} else {
// Everything else aligns to the chosen indentation.
emitter.indent = emitter.best_indent*((emitter.indent+emitter.best_indent)/emitter.best_indent)
emitter.indent = emitter.best_indent * ((emitter.indent + emitter.best_indent) / emitter.best_indent)
}
}
return true
Expand Down Expand Up @@ -968,36 +968,70 @@ func yaml_emitter_check_empty_mapping(emitter *yaml_emitter_t) bool {

// Check if the next node can be expressed as a simple key.
func yaml_emitter_check_simple_key(emitter *yaml_emitter_t) bool {
length := 0
// first check length in bytes, since the fast majority of strings are
// within 1024 bytes, so take the faster route first. Only if we must,
// we check the length in runes and take the expensive route.
bLength := 0
switch emitter.events[emitter.events_head].typ {
case yaml_ALIAS_EVENT:
length += len(emitter.anchor_data.anchor)
bLength += len(emitter.anchor_data.anchor)
case yaml_SCALAR_EVENT:
if emitter.scalar_data.multiline {
return false
}
length += len(emitter.anchor_data.anchor) +
bLength += len(emitter.anchor_data.anchor) +
len(emitter.tag_data.handle) +
len(emitter.tag_data.suffix) +
len(emitter.scalar_data.value)
case yaml_SEQUENCE_START_EVENT:
if !yaml_emitter_check_empty_sequence(emitter) {
return false
}
length += len(emitter.anchor_data.anchor) +
bLength += len(emitter.anchor_data.anchor) +
len(emitter.tag_data.handle) +
len(emitter.tag_data.suffix)
case yaml_MAPPING_START_EVENT:
if !yaml_emitter_check_empty_mapping(emitter) {
return false
}
length += len(emitter.anchor_data.anchor) +
bLength += len(emitter.anchor_data.anchor) +
len(emitter.tag_data.handle) +
len(emitter.tag_data.suffix)
default:
return false
}
return length <= 128

// length represents bytes, not runes.
if bLength <= 1024 {
// 1024 or less bytes are also 1024 or less runes.
// fast majority of cases are handled here.
return true
}
if bLength > 4096 {
// 4096 or more bytes are always 1024 or more runes.
return false
}

// we must convert and count runes, which is more expensive but less common.
rLength := 0
switch emitter.events[emitter.events_head].typ {
case yaml_ALIAS_EVENT:
rLength += utf8.RuneCount(emitter.anchor_data.anchor)
case yaml_SCALAR_EVENT:
rLength += utf8.RuneCount(emitter.anchor_data.anchor) +
utf8.RuneCount(emitter.tag_data.handle) +
utf8.RuneCount(emitter.tag_data.suffix) +
utf8.RuneCount(emitter.scalar_data.value)
case yaml_SEQUENCE_START_EVENT:
rLength += utf8.RuneCount(emitter.anchor_data.anchor) +
utf8.RuneCount(emitter.tag_data.handle) +
utf8.RuneCount(emitter.tag_data.suffix)
case yaml_MAPPING_START_EVENT:
rLength += utf8.RuneCount(emitter.anchor_data.anchor) +
utf8.RuneCount(emitter.tag_data.handle) +
utf8.RuneCount(emitter.tag_data.suffix)
}
return rLength <= 1024
}

// Determine an acceptable scalar style.
Expand Down
10 changes: 10 additions & 0 deletions encode_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,16 @@ var marshalTests = []struct {
"a: \"1:1\"\n",
},

// Issue 849: simple key is <= 1024 runes, not <= 1024 bytes
{
map[string]string{strings.Repeat("a", 1024): "a"},
strings.Repeat("a", 1024) + ": a" + "\n",
},
{
map[string]string{strings.Repeat("你", 1024): "a"},
strings.Repeat("你", 1024) + ": a" + "\n",
},

// Binary data.
{
map[string]string{"a": "\x00"},
Expand Down
8 changes: 4 additions & 4 deletions go.mod
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
module "gopkg.in/yaml.v3"
module gopkg.in/yaml.v3

require (
"gopkg.in/check.v1" v0.0.0-20161208181325-20d25e280405
)
go 1.23.0

require gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=