Skip to content

Commit

Permalink
fix: simple key handling upto 1024 chars.
Browse files Browse the repository at this point in the history
Currently, if a key is >128 characters, it is
handled as a complex key mapping. This renders some
OpenAPI spec paths invalid.
Until the same PR go-yaml#1037
is not accepted in the upstream repository,
we are adding the changes in this fork and will be
using this in deck.

Fixes: Kong/go-apiops#198
  • Loading branch information
Prashansa-K committed Sep 6, 2024
1 parent f6f7691 commit 46ecb17
Show file tree
Hide file tree
Showing 4 changed files with 61 additions and 15 deletions.
56 changes: 45 additions & 11 deletions emitterc.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ package yaml
import (
"bytes"
"fmt"
"unicode/utf8"
)

// Flush the buffer if needed.
Expand Down Expand Up @@ -162,10 +163,9 @@ func yaml_emitter_emit(emitter *yaml_emitter_t, event *yaml_event_t) bool {
// Check if we need to accumulate more events before emitting.
//
// We accumulate extra
// - 1 event for DOCUMENT-START
// - 2 events for SEQUENCE-START
// - 3 events for MAPPING-START
//
// - 1 event for DOCUMENT-START
// - 2 events for SEQUENCE-START
// - 3 events for MAPPING-START
func yaml_emitter_need_more_events(emitter *yaml_emitter_t) bool {
if emitter.events_head == len(emitter.events) {
return true
Expand Down Expand Up @@ -241,7 +241,7 @@ func yaml_emitter_increase_indent(emitter *yaml_emitter_t, flow, indentless bool
emitter.indent += 2
} else {
// Everything else aligns to the chosen indentation.
emitter.indent = emitter.best_indent*((emitter.indent+emitter.best_indent)/emitter.best_indent)
emitter.indent = emitter.best_indent * ((emitter.indent + emitter.best_indent) / emitter.best_indent)
}
}
return true
Expand Down Expand Up @@ -968,36 +968,70 @@ func yaml_emitter_check_empty_mapping(emitter *yaml_emitter_t) bool {

// Check if the next node can be expressed as a simple key.
func yaml_emitter_check_simple_key(emitter *yaml_emitter_t) bool {
length := 0
// first check length in bytes, since the fast majority of strings are
// within 1024 bytes, so take the faster route first. Only if we must,
// we check the length in runes and take the expensive route.
bLength := 0
switch emitter.events[emitter.events_head].typ {
case yaml_ALIAS_EVENT:
length += len(emitter.anchor_data.anchor)
bLength += len(emitter.anchor_data.anchor)
case yaml_SCALAR_EVENT:
if emitter.scalar_data.multiline {
return false
}
length += len(emitter.anchor_data.anchor) +
bLength += len(emitter.anchor_data.anchor) +
len(emitter.tag_data.handle) +
len(emitter.tag_data.suffix) +
len(emitter.scalar_data.value)
case yaml_SEQUENCE_START_EVENT:
if !yaml_emitter_check_empty_sequence(emitter) {
return false
}
length += len(emitter.anchor_data.anchor) +
bLength += len(emitter.anchor_data.anchor) +
len(emitter.tag_data.handle) +
len(emitter.tag_data.suffix)
case yaml_MAPPING_START_EVENT:
if !yaml_emitter_check_empty_mapping(emitter) {
return false
}
length += len(emitter.anchor_data.anchor) +
bLength += len(emitter.anchor_data.anchor) +
len(emitter.tag_data.handle) +
len(emitter.tag_data.suffix)
default:
return false
}
return length <= 128

// length represents bytes, not runes.
if bLength <= 1024 {
// 1024 or less bytes are also 1024 or less runes.
// fast majority of cases are handled here.
return true
}
if bLength > 4096 {
// 4096 or more bytes are always 1024 or more runes.
return false
}

// we must convert and count runes, which is more expensive but less common.
rLength := 0
switch emitter.events[emitter.events_head].typ {
case yaml_ALIAS_EVENT:
rLength += utf8.RuneCount(emitter.anchor_data.anchor)
case yaml_SCALAR_EVENT:
rLength += utf8.RuneCount(emitter.anchor_data.anchor) +
utf8.RuneCount(emitter.tag_data.handle) +
utf8.RuneCount(emitter.tag_data.suffix) +
utf8.RuneCount(emitter.scalar_data.value)
case yaml_SEQUENCE_START_EVENT:
rLength += utf8.RuneCount(emitter.anchor_data.anchor) +
utf8.RuneCount(emitter.tag_data.handle) +
utf8.RuneCount(emitter.tag_data.suffix)
case yaml_MAPPING_START_EVENT:
rLength += utf8.RuneCount(emitter.anchor_data.anchor) +
utf8.RuneCount(emitter.tag_data.handle) +
utf8.RuneCount(emitter.tag_data.suffix)
}
return rLength <= 1024
}

// Determine an acceptable scalar style.
Expand Down
10 changes: 10 additions & 0 deletions encode_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,16 @@ var marshalTests = []struct {
"a: \"1:1\"\n",
},

// Issue 849: simple key is <= 1024 runes, not <= 1024 bytes
{
map[string]string{strings.Repeat("a", 1024): "a"},
strings.Repeat("a", 1024) + ": a" + "\n",
},
{
map[string]string{strings.Repeat("你", 1024): "a"},
strings.Repeat("你", 1024) + ": a" + "\n",
},

// Binary data.
{
map[string]string{"a": "\x00"},
Expand Down
8 changes: 4 additions & 4 deletions go.mod
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
module "gopkg.in/yaml.v3"
module gopkg.in/yaml.v3

require (
"gopkg.in/check.v1" v0.0.0-20161208181325-20d25e280405
)
go 1.23.0

require gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=

0 comments on commit 46ecb17

Please sign in to comment.