diff --git a/interp/functions.go b/interp/functions.go index 8e12547..25e584d 100644 --- a/interp/functions.go +++ b/interp/functions.go @@ -415,15 +415,16 @@ func (p *interp) sprintf(format string, args []value) (string, error) { n, isStr := a.isTrueStr() if isStr { s := p.toString(a) - if len(s) > 0 { - c = []byte{s[0]} + _, size := utf8.DecodeRuneInString(s) + if size > 0 { + c = []byte(s[:size]) } else { c = []byte{0} } } else { - // Follow the behaviour of awk and mawk, where %c - // operates on bytes (0-255), not Unicode codepoints - c = []byte{byte(n)} + c = make([]byte, utf8.UTFMax) + size := utf8.EncodeRune(c, rune(n)) + c = c[:size] } v = c } diff --git a/interp/interp_test.go b/interp/interp_test.go index 124ca30..2083b90 100644 --- a/interp/interp_test.go +++ b/interp/interp_test.go @@ -85,14 +85,14 @@ NR==3, NR==5 { print NR } {`BEGIN { printf "%.1g", 42 } # !windows-gawk`, "", "4e+01", "", ""}, // for some reason gawk gives "4e+001" on Windows {`BEGIN { printf "%d", 12, 34 }`, "", "12", "", ""}, {`BEGIN { printf "%d" }`, "", "", "format error: got 0 args, expected 1", "not enough arg"}, - // Our %c handling is mostly like awk's, except for multiples - // 256, where awk is weird, and we're like mawk {`BEGIN { printf "%c", 0 }`, "", "\x00", "", ""}, {`BEGIN { printf "%c", 127 }`, "", "\x7f", "", ""}, - {`BEGIN { printf "%c", 128 } # !gawk`, "", "\x80", "", ""}, - {`BEGIN { printf "%c", 255 } # !gawk`, "", "\xff", "", ""}, - {`BEGIN { printf "%c", 256 } # !awk !gawk`, "", "\x00", "", ""}, + {`BEGIN { printf "%c", 128 } # !windows-gawk`, "", "\u0080", "", ""}, + {`BEGIN { printf "%c", 255 } # !windows-gawk`, "", "ÿ", "", ""}, + {`BEGIN { printf "%c", 256 } # !windows-gawk`, "", "Ā", "", ""}, + {`BEGIN { printf "%c", 4660 } # !windows-gawk`, "", "\u1234", "", ""}, {`BEGIN { printf "%c", "xyz" }`, "", "x", "", ""}, + {`BEGIN { printf "%c %c %c", "Ā", "ĀĀĀ", "Āx" } # !windows-gawk`, "", "Ā Ā Ā", "", ""}, {`BEGIN { printf "%c", "" } # !awk`, "", "\x00", "", ""}, {`BEGIN { printf } # !awk !posix - doesn't error on this`, "", "", "parse error at 1:16: expected printf args, got none", "printf: no arguments"}, {`BEGIN { printf("%%%dd", 4) }`, "", "%4d", "", ""},