From 46ecb171fc944561eefa830ff38aec758a9d5e6d Mon Sep 17 00:00:00 2001 From: Prashansa Kulshrestha Date: Fri, 6 Sep 2024 11:22:06 +0530 Subject: [PATCH] fix: simple key handling upto 1024 chars. Currently, if a key is >128 characters, it is handled as a complex key mapping. This renders some OpenAPI spec paths invalid. Until the same PR https://github.com/go-yaml/yaml/pull/1037 is not accepted in the upstream repository, we are adding the changes in this fork and will be using this in deck. Fixes: https://github.com/Kong/go-apiops/issues/198 --- emitterc.go | 56 ++++++++++++++++++++++++++++++++++++++++---------- encode_test.go | 10 +++++++++ go.mod | 8 ++++---- go.sum | 2 ++ 4 files changed, 61 insertions(+), 15 deletions(-) create mode 100644 go.sum diff --git a/emitterc.go b/emitterc.go index 0f47c9ca..48b9ba40 100644 --- a/emitterc.go +++ b/emitterc.go @@ -25,6 +25,7 @@ package yaml import ( "bytes" "fmt" + "unicode/utf8" ) // Flush the buffer if needed. @@ -162,10 +163,9 @@ func yaml_emitter_emit(emitter *yaml_emitter_t, event *yaml_event_t) bool { // Check if we need to accumulate more events before emitting. // // We accumulate extra -// - 1 event for DOCUMENT-START -// - 2 events for SEQUENCE-START -// - 3 events for MAPPING-START -// +// - 1 event for DOCUMENT-START +// - 2 events for SEQUENCE-START +// - 3 events for MAPPING-START func yaml_emitter_need_more_events(emitter *yaml_emitter_t) bool { if emitter.events_head == len(emitter.events) { return true @@ -241,7 +241,7 @@ func yaml_emitter_increase_indent(emitter *yaml_emitter_t, flow, indentless bool emitter.indent += 2 } else { // Everything else aligns to the chosen indentation. - emitter.indent = emitter.best_indent*((emitter.indent+emitter.best_indent)/emitter.best_indent) + emitter.indent = emitter.best_indent * ((emitter.indent + emitter.best_indent) / emitter.best_indent) } } return true @@ -968,15 +968,18 @@ func yaml_emitter_check_empty_mapping(emitter *yaml_emitter_t) bool { // Check if the next node can be expressed as a simple key. func yaml_emitter_check_simple_key(emitter *yaml_emitter_t) bool { - length := 0 + // first check length in bytes, since the fast majority of strings are + // within 1024 bytes, so take the faster route first. Only if we must, + // we check the length in runes and take the expensive route. + bLength := 0 switch emitter.events[emitter.events_head].typ { case yaml_ALIAS_EVENT: - length += len(emitter.anchor_data.anchor) + bLength += len(emitter.anchor_data.anchor) case yaml_SCALAR_EVENT: if emitter.scalar_data.multiline { return false } - length += len(emitter.anchor_data.anchor) + + bLength += len(emitter.anchor_data.anchor) + len(emitter.tag_data.handle) + len(emitter.tag_data.suffix) + len(emitter.scalar_data.value) @@ -984,20 +987,51 @@ func yaml_emitter_check_simple_key(emitter *yaml_emitter_t) bool { if !yaml_emitter_check_empty_sequence(emitter) { return false } - length += len(emitter.anchor_data.anchor) + + bLength += len(emitter.anchor_data.anchor) + len(emitter.tag_data.handle) + len(emitter.tag_data.suffix) case yaml_MAPPING_START_EVENT: if !yaml_emitter_check_empty_mapping(emitter) { return false } - length += len(emitter.anchor_data.anchor) + + bLength += len(emitter.anchor_data.anchor) + len(emitter.tag_data.handle) + len(emitter.tag_data.suffix) default: return false } - return length <= 128 + + // length represents bytes, not runes. + if bLength <= 1024 { + // 1024 or less bytes are also 1024 or less runes. + // fast majority of cases are handled here. + return true + } + if bLength > 4096 { + // 4096 or more bytes are always 1024 or more runes. + return false + } + + // we must convert and count runes, which is more expensive but less common. + rLength := 0 + switch emitter.events[emitter.events_head].typ { + case yaml_ALIAS_EVENT: + rLength += utf8.RuneCount(emitter.anchor_data.anchor) + case yaml_SCALAR_EVENT: + rLength += utf8.RuneCount(emitter.anchor_data.anchor) + + utf8.RuneCount(emitter.tag_data.handle) + + utf8.RuneCount(emitter.tag_data.suffix) + + utf8.RuneCount(emitter.scalar_data.value) + case yaml_SEQUENCE_START_EVENT: + rLength += utf8.RuneCount(emitter.anchor_data.anchor) + + utf8.RuneCount(emitter.tag_data.handle) + + utf8.RuneCount(emitter.tag_data.suffix) + case yaml_MAPPING_START_EVENT: + rLength += utf8.RuneCount(emitter.anchor_data.anchor) + + utf8.RuneCount(emitter.tag_data.handle) + + utf8.RuneCount(emitter.tag_data.suffix) + } + return rLength <= 1024 } // Determine an acceptable scalar style. diff --git a/encode_test.go b/encode_test.go index 4a8bf2e2..cd0cc305 100644 --- a/encode_test.go +++ b/encode_test.go @@ -349,6 +349,16 @@ var marshalTests = []struct { "a: \"1:1\"\n", }, + // Issue 849: simple key is <= 1024 runes, not <= 1024 bytes + { + map[string]string{strings.Repeat("a", 1024): "a"}, + strings.Repeat("a", 1024) + ": a" + "\n", + }, + { + map[string]string{strings.Repeat("你", 1024): "a"}, + strings.Repeat("你", 1024) + ": a" + "\n", + }, + // Binary data. { map[string]string{"a": "\x00"}, diff --git a/go.mod b/go.mod index f407ea32..67aca269 100644 --- a/go.mod +++ b/go.mod @@ -1,5 +1,5 @@ -module "gopkg.in/yaml.v3" +module gopkg.in/yaml.v3 -require ( - "gopkg.in/check.v1" v0.0.0-20161208181325-20d25e280405 -) +go 1.23.0 + +require gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 diff --git a/go.sum b/go.sum new file mode 100644 index 00000000..d277f45c --- /dev/null +++ b/go.sum @@ -0,0 +1,2 @@ +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=