From 938ba2f81adad497a1cb84fdc751eb2bb9f247b1 Mon Sep 17 00:00:00 2001 From: Roxy Light Date: Tue, 11 Feb 2025 20:50:53 -0800 Subject: [PATCH] Implement `utf8` library Run pattern-matching and UTF-8 tests from Lua test suite. Updates #74 --- internal/lua/auxlib.go | 2 +- internal/lua/lua_test.go | 11 +- internal/lua/testdata/testsuite/pm.lua | 119 ++++--- internal/lua/testdata/testsuite/utf8.lua | 3 +- internal/lua/utf8lib.go | 349 ++++++++++++++++++ internal/lua/utf8lib_test.go | 430 +++++++++++++++++++++++ 6 files changed, 856 insertions(+), 58 deletions(-) create mode 100644 internal/lua/utf8lib.go create mode 100644 internal/lua/utf8lib_test.go diff --git a/internal/lua/auxlib.go b/internal/lua/auxlib.go index 27d710e..365e57a 100644 --- a/internal/lua/auxlib.go +++ b/internal/lua/auxlib.go @@ -438,9 +438,9 @@ func OpenLibraries(ctx context.Context, l *State) error { {TableLibraryName, OpenTable}, {StringLibraryName, OpenString}, {MathLibraryName, NewOpenMath(nil)}, + {UTF8LibraryName, OpenUTF8}, // {IOLibraryName, NewIOLibrary().OpenLibrary}, // {OSLibraryName, NewOSLibrary().OpenLibrary}, - // {UTF8LibraryName, OpenUTF8}, // {DebugLibraryName, OpenDebug}, // {PackageLibraryName, OpenPackage}, } diff --git a/internal/lua/lua_test.go b/internal/lua/lua_test.go index 9e5fa5c..1b4d946 100644 --- a/internal/lua/lua_test.go +++ b/internal/lua/lua_test.go @@ -1187,7 +1187,9 @@ func TestRotate(t *testing.T) { func TestSuite(t *testing.T) { names := []string{ "math", + "pm", "strings", + "utf8", } for _, name := range names { @@ -1207,6 +1209,13 @@ func TestSuite(t *testing.T) { t.Fatal(err) } + // Message handler. + l.PushClosure(0, func(ctx context.Context, l *State) (int, error) { + msg, _ := l.ToString(1) + l.PushStringContext(Traceback(l, msg, 1), l.StringContext(1)) + return 1, nil + }) + sourcePath := filepath.Join("testdata", "testsuite", name+".lua") sourceData, err := os.ReadFile(sourcePath) if err != nil { @@ -1216,7 +1225,7 @@ func TestSuite(t *testing.T) { if err != nil { t.Fatal(err) } - if err := l.Call(ctx, 0, 0); err != nil { + if err := l.PCall(ctx, 0, 0, -2); err != nil { t.Fatal(err) } }) diff --git a/internal/lua/testdata/testsuite/pm.lua b/internal/lua/testdata/testsuite/pm.lua index e5e3f7a..60df98d 100644 --- a/internal/lua/testdata/testsuite/pm.lua +++ b/internal/lua/testdata/testsuite/pm.lua @@ -115,10 +115,11 @@ local function f1 (s, p) return string.sub(s, t[1], t[#t] - 1) end -assert(f1('alo alx 123 b\0o b\0o', '(..*) %1') == "b\0o b\0o") -assert(f1('axz123= 4= 4 34', '(.+)=(.*)=%2 %1') == '3= 4= 4 3') -assert(f1('=======', '^(=*)=%1$') == '=======') -assert(not string.match('==========', '^([=]*)=%1$')) +-- XXX: Backreferences not supported. +-- assert(f1('alo alx 123 b\0o b\0o', '(..*) %1') == "b\0o b\0o") +-- assert(f1('axz123= 4= 4 34', '(.+)=(.*)=%2 %1') == '3= 4= 4 3') +-- assert(f1('=======', '^(=*)=%1$') == '=======') +-- assert(not string.match('==========', '^([=]*)=%1$')) local function range (i, j) if i <= j then @@ -145,8 +146,9 @@ assert(strset('[^%W]') == strset('[%w]')) assert(strset('[]%%]') == '%]') assert(strset('[a%-z]') == '-az') assert(strset('[%^%[%-a%]%-b]') == '-[]^ab') -assert(strset('%Z') == strset('[\1-\255]')) -assert(strset('.') == strset('[\1-\255%z]')) +-- XXX: Undocumented %z not supported. +-- assert(strset('%Z') == strset('[\1-\255]')) +assert(strset('.') == strset('[\0-\255]')) print('+'); assert(string.match("alo xyzK", "(%w+)K") == "xyz") @@ -227,13 +229,14 @@ local r = string.gsub(s, '()(%w+)()', function (a,w,b) assert(s == r and t[1] == 1 and t[3] == 3 and t[7] == 4 and t[13] == 4) -local function isbalanced (s) - return not string.find(string.gsub(s, "%b()", ""), "[()]") -end +-- XXX: Balances not supported. +-- local function isbalanced (s) +-- return not string.find(string.gsub(s, "%b()", ""), "[()]") +-- end -assert(isbalanced("(9 ((8))(\0) 7) \0\0 a b ()(c)() a")) -assert(not isbalanced("(9 ((8) 7) a b (\0 c) a")) -assert(string.gsub("alo 'oi' alo", "%b''", '"') == 'alo " alo') +-- assert(isbalanced("(9 ((8))(\0) 7) \0\0 a b ()(c)() a")) +-- assert(not isbalanced("(9 ((8) 7) a b (\0 c) a")) +-- assert(string.gsub("alo 'oi' alo", "%b''", '"') == 'alo " alo') local t = {"apple", "orange", "lime"; n=0} @@ -252,8 +255,9 @@ assert(t[1] == "first" and t[2] == "second" and t[3] == undef) checkerror("invalid replacement value %(a table%)", string.gsub, "alo", ".", {a = {}}) checkerror("invalid capture index %%2", string.gsub, "alo", ".", "%2") -checkerror("invalid capture index %%0", string.gsub, "alo", "(%0)", "a") -checkerror("invalid capture index %%1", string.gsub, "alo", "(%1)", "a") +-- XXX: Backreferences not supported. +-- checkerror("invalid capture index %%0", string.gsub, "alo", "(%0)", "a") +-- checkerror("invalid capture index %%1", string.gsub, "alo", "(%1)", "a") checkerror("invalid use of '%%'", string.gsub, "alo", ".", "%x") @@ -301,11 +305,12 @@ for w in string.gmatch("first second word", "%w+") do end assert(t[1] == "first" and t[2] == "second" and t[3] == "word") -t = {3, 6, 9} -for i in string.gmatch ("xuxx uu ppar r", "()(.)%2") do - assert(i == table.remove(t, 1)) -end -assert(#t == 0) +-- XXX: Backreferences not supported. +-- t = {3, 6, 9} +-- for i in string.gmatch ("xuxx uu ppar r", "()(.)%2") do +-- assert(i == table.remove(t, 1)) +-- end +-- assert(#t == 0) t = {} for i,j in string.gmatch("13 14 10 = 11, 15= 16, 22=23", "(%d+)%s*=%s*(%d+)") do @@ -355,12 +360,14 @@ assert(string.gsub("function", "%f[\1-\255]%w", ".") == ".unction") assert(string.gsub("function", "%f[^\1-\255]", ".") == "function.") assert(string.find("a", "%f[a]") == 1) -assert(string.find("a", "%f[^%z]") == 1) +-- XXX: Undocumented %z not supported. +assert(string.find("a", "%f[^\0]") == 1) assert(string.find("a", "%f[^%l]") == 2) -assert(string.find("aba", "%f[a%z]") == 3) -assert(string.find("aba", "%f[%z]") == 4) -assert(not string.find("aba", "%f[%l%z]")) -assert(not string.find("aba", "%f[^%l%z]")) +-- XXX: Undocumented %z not supported. +assert(string.find("aba", "%f[a\0]") == 3) +assert(string.find("aba", "%f[\0]") == 4) +assert(not string.find("aba", "%f[%l\0]")) +assert(not string.find("aba", "%f[^%l\0]")) local i, e = string.find(" alo aalo allo", "%f[%S].-%f[%s].-%f[%S]") assert(i == 2 and e == 5) @@ -388,8 +395,8 @@ malform("[]") malform("[^]") malform("[a%]") malform("[a%") -malform("%b") -malform("%ba") +malform("%b", "balances not supported") +malform("%ba", "balances not supported") malform("%") malform("%f", "missing") @@ -398,7 +405,8 @@ assert(string.match("ab\0\1\2c", "[\0-\2]+") == "\0\1\2") assert(string.match("ab\0\1\2c", "[\0-\0]+") == "\0") assert(string.find("b$a", "$\0?") == 2) assert(string.find("abc\0efg", "%\0") == 4) -assert(string.match("abc\0efg\0\1e\1g", "%b\0\1") == "\0efg\0\1e\1") +-- XXX: Balances not supported. +-- assert(string.match("abc\0efg\0\1e\1g", "%b\0\1") == "\0efg\0\1e\1") assert(string.match("abc\0\0\0", "%\0+") == "\0\0\0") assert(string.match("abc\0\0\0", "%\0%\0?") == "\0\0") @@ -407,34 +415,35 @@ assert(string.find("abc\0\0","\0.") == 4) assert(string.find("abcx\0\0abc\0abc","x\0\0abc\0a.") == 4) -do -- test reuse of original string in gsub - local s = string.rep("a", 100) - local r = string.gsub(s, "b", "c") -- no match - assert(string.format("%p", s) == string.format("%p", r)) - - r = string.gsub(s, ".", {x = "y"}) -- no substitutions - assert(string.format("%p", s) == string.format("%p", r)) - - local count = 0 - r = string.gsub(s, ".", function (x) - assert(x == "a") - count = count + 1 - return nil -- no substitution - end) - r = string.gsub(r, ".", {b = 'x'}) -- "a" is not a key; no subst. - assert(count == 100) - assert(string.format("%p", s) == string.format("%p", r)) - - count = 0 - r = string.gsub(s, ".", function (x) - assert(x == "a") - count = count + 1 - return x -- substitution... - end) - assert(count == 100) - -- no reuse in this case - assert(r == s and string.format("%p", s) ~= string.format("%p", r)) -end +-- XXX: Strings do not have a pointer address in our implementation. +-- do -- test reuse of original string in gsub +-- local s = string.rep("a", 100) +-- local r = string.gsub(s, "b", "c") -- no match +-- assert(string.format("%p", s) == string.format("%p", r)) + +-- r = string.gsub(s, ".", {x = "y"}) -- no substitutions +-- assert(string.format("%p", s) == string.format("%p", r)) + +-- local count = 0 +-- r = string.gsub(s, ".", function (x) +-- assert(x == "a") +-- count = count + 1 +-- return nil -- no substitution +-- end) +-- r = string.gsub(r, ".", {b = 'x'}) -- "a" is not a key; no subst. +-- assert(count == 100) +-- assert(string.format("%p", s) == string.format("%p", r)) + +-- count = 0 +-- r = string.gsub(s, ".", function (x) +-- assert(x == "a") +-- count = count + 1 +-- return x -- substitution... +-- end) +-- assert(count == 100) +-- -- no reuse in this case +-- assert(r == s and string.format("%p", s) ~= string.format("%p", r)) +-- end print('OK') diff --git a/internal/lua/testdata/testsuite/utf8.lua b/internal/lua/testdata/testsuite/utf8.lua index efadbd5..8184755 100644 --- a/internal/lua/testdata/testsuite/utf8.lua +++ b/internal/lua/testdata/testsuite/utf8.lua @@ -5,7 +5,8 @@ print "testing UTF-8 library" -local utf8 = require'utf8' +-- XXX: utf8 library available as a global instead of being required. +local utf8 = utf8 local function checkerror (msg, f, ...) diff --git a/internal/lua/utf8lib.go b/internal/lua/utf8lib.go new file mode 100644 index 0000000..4befd1e --- /dev/null +++ b/internal/lua/utf8lib.go @@ -0,0 +1,349 @@ +// Copyright 2025 The zb Authors +// SPDX-License-Identifier: MIT + +package lua + +import ( + "context" + "errors" + "fmt" + "strings" + "unicode/utf8" +) + +// TableLibraryName is the conventional identifier for the [table manipulation library]. +// +// [table manipulation library]: https://www.lua.org/manual/5.4/manual.html#6.6 +const UTF8LibraryName = "utf8" + +// maxUTF8 is the maximum permitted UTF-8 codepoint. +const maxUTF8 = 0x7fffffff + +// OpenUTF8 is a [Function] that loads the [UTF-8 library]. +// This function is intended to be used as an argument to [Require]. +// +// [UTF-8 library]: https://www.lua.org/manual/5.4/manual.html#6.5 +func OpenUTF8(ctx context.Context, l *State) (int, error) { + NewLib(l, map[string]Function{ + "char": utf8Char, + "charpattern": nil, + "codepoint": utf8Codepoint, + "codes": utf8Codes, + "len": utf8Len, + "offset": utf8Offset, + }) + l.PushString("[\x00-\x7F\xC2-\xFD][\x80-\xBF]*") + l.RawSetField(-2, "charpattern") + return 1, nil +} + +func utf8Char(ctx context.Context, l *State) (int, error) { + sb := new(strings.Builder) + for i := range l.Top() { + codePoint, err := CheckInteger(l, 1+i) + if err != nil { + return 0, err + } + if codePoint < 0 || codePoint > maxUTF8 { + return 0, NewArgError(l, 1+i, "value out of range") + } + writeRune(sb, rune(codePoint)) + } + l.PushString(sb.String()) + return 1, nil +} + +func utf8Codes(ctx context.Context, l *State) (int, error) { + lax := l.ToBoolean(2) + s, err := CheckString(l, 1) + if err != nil { + return 0, err + } + if len(s) > 0 && !utf8.RuneStart(s[0]) { + return 0, NewArgError(l, 1, errInvalidUTF8.Error()) + } + l.PushClosure(0, func(ctx context.Context, l *State) (int, error) { + return utf8CodesNext(ctx, l, lax) + }) + l.PushValue(1) + l.PushInteger(0) + return 3, nil +} + +func utf8CodesNext(ctx context.Context, l *State, lax bool) (int, error) { + s, err := CheckString(l, 1) + if err != nil { + return 0, err + } + n, _ := l.ToInteger(2) + for n >= 0 && n < int64(len(s)) && !utf8.RuneStart(s[n]) { + n++ + } + if n < 0 || n >= int64(len(s)) { + return 0, nil + } + var c rune + var size int + if lax { + c, size = decodeLaxUTF8RuneInString(s[n:]) + } else { + c, size = utf8.DecodeRuneInString(s[n:]) + } + if c == utf8.RuneError && size == 1 || + int(n)+size < len(s) && !utf8.RuneStart(s[int(n)+size]) { + return 0, errInvalidUTF8 + } + l.PushInteger(n + 1) // Reference implementation always returns n + 1. + l.PushInteger(int64(c)) + return 2, nil +} + +func utf8Codepoint(ctx context.Context, l *State) (int, error) { + s, err := CheckString(l, 1) + if err != nil { + return 0, err + } + iArg := int64(1) + if !l.IsNoneOrNil(2) { + var err error + iArg, err = CheckInteger(l, 2) + if err != nil { + return 0, err + } + } + var i int + switch { + case iArg == 0 || iArg < -int64(len(s)): + return 0, NewArgError(l, 2, "out of bounds") + case iArg < 0: + i = len(s) + int(iArg) + default: + i = int(iArg - 1) + } + jArg := int64(iArg) + if !l.IsNoneOrNil(3) { + var err error + jArg, err = CheckInteger(l, 3) + if err != nil { + return 0, err + } + } + var j int + switch { + case jArg > int64(len(s)): + return 0, NewArgError(l, 3, "out of bounds") + case jArg < -int64(len(s)): + j = 0 + case jArg < 0: + j = len(s) + int(jArg) + default: + j = int(jArg - 1) + } + decode := utf8.DecodeRuneInString + if l.ToBoolean(4) { + decode = decodeLaxUTF8RuneInString + } + + if i > j { + return 0, nil + } + if !l.CheckStack(j - i + 1) { + return 0, fmt.Errorf("%sstack overflow (string slice too long)", Where(l, 1)) + } + n := 0 + for i <= j && i < len(s) { + c, size := decode(s[i:]) + if c == utf8.RuneError && size == 1 { + return 0, errInvalidUTF8 + } + l.PushInteger(int64(c)) + n++ + i += size + } + return n, nil +} + +func utf8Len(ctx context.Context, l *State) (int, error) { + s, err := CheckString(l, 1) + if err != nil { + return 0, err + } + iArg := int64(1) + if !l.IsNoneOrNil(2) { + var err error + iArg, err = CheckInteger(l, 2) + if err != nil { + return 0, err + } + } + var i int + switch { + case iArg == 0 || iArg > int64(len(s))+1 || iArg < -int64(len(s)): + return 0, NewArgError(l, 2, "initial position out of bounds") + case iArg < 0: + i = int(int64(len(s)) + iArg) + default: + i = int(iArg) - 1 + } + jArg := int64(-1) + if !l.IsNoneOrNil(3) { + var err error + jArg, err = CheckInteger(l, 3) + if err != nil { + return 0, err + } + } + var j int + switch { + case jArg < 0: + j = int(max(int64(len(s))+jArg, 0)) + case jArg > int64(len(s)): + return 0, NewArgError(l, 3, "final position out of bounds") + default: + j = int(jArg) - 1 + } + decode := utf8.DecodeRuneInString + if l.ToBoolean(4) { + decode = decodeLaxUTF8RuneInString + } + + n := 0 + for i <= j && i < len(s) { + c, size := decode(s[i:]) + if c == utf8.RuneError && size == 1 { + l.PushNil() + l.PushInteger(int64(1 + i)) + return 2, nil + } + i += size + n++ + } + l.PushInteger(int64(n)) + return 1, nil +} + +func utf8Offset(ctx context.Context, l *State) (int, error) { + s, err := CheckString(l, 1) + if err != nil { + return 0, err + } + n, err := CheckInteger(l, 2) + if err != nil { + return 0, err + } + i := int64(1) + if n < 0 { + i = int64(len(s)) + 1 + } + if !l.IsNoneOrNil(3) { + var err error + i, err = CheckInteger(l, 3) + if err != nil { + return 0, err + } + } + i-- + if i < 0 || i > int64(len(s)) { + return 0, NewArgError(l, 3, "position out of bounds") + } + + if n == 0 { + for 0 < i && i < int64(len(s)) && !utf8.RuneStart(s[i]) { + i-- + } + l.PushInteger(i + 1) + return 1, nil + } + + if i < int64(len(s)) && !utf8.RuneStart(s[i]) { + return 0, fmt.Errorf("%sinitial position is a continuation byte", Where(l, 1)) + } + + if n < 0 { + for n < 0 && i > 0 { + for { + i-- + if i <= 0 || utf8.RuneStart(s[i]) { + break + } + } + n++ + } + } else { + n-- // Do not move for first character. + for n > 0 && i < int64(len(s)) { + for { + i++ + if i >= int64(len(s)) || utf8.RuneStart(s[i]) { + break + } + } + n-- + } + } + if n == 0 { + l.PushInteger(i + 1) + } else { + l.PushNil() + } + return 1, nil +} + +func decodeLaxUTF8RuneInString(s string) (r rune, size int) { + if len(s) == 0 { + return utf8.RuneError, 0 + } + c := s[0] + if isASCII(rune(c)) { + return rune(c), 1 + } + // Read continuation bytes. + for size = 1; c&0x40 != 0; c, size = c<<1, size+1 { + if size >= len(s) { + return utf8.RuneError, 1 + } + cc := s[size] + if utf8.RuneStart(cc) { + return utf8.RuneError, 1 + } + r = (r << 6) | rune(cc&0x3f) + } + // Add first byte. + r |= rune(c&0x7f) << ((size - 1) * 5) + + limits := [...]rune{1<<31 - 1, 0x80, 0x800, 0x10000, 0x200000, 0x4000000} + if size-1 >= len(limits) || r > maxUTF8 || r < limits[size-1] { + return utf8.RuneError, 1 + } + return r, size +} + +// writeRune encodes a rune as UTF-8, +// permitting runes up to 1<<31 - 1. +func writeRune(sb *strings.Builder, c rune) { + if isASCII(c) { + sb.WriteByte(byte(c)) + return + } + + var buf [6]byte + firstByteMax := byte(0x3f) + n := 1 + for { + buf[len(buf)-n] = byte(0x80 | (c & 0x3f)) + n++ + c >>= 6 + firstByteMax >>= 1 + if c <= rune(firstByteMax) { + break + } + } + buf[len(buf)-n] = (^firstByteMax << 1) | byte(c) + sb.Write(buf[len(buf)-n:]) +} + +func isASCII(c rune) bool { + return 0 <= c && c < 0x80 +} + +var errInvalidUTF8 = errors.New("invalid UTF-8 code") diff --git a/internal/lua/utf8lib_test.go b/internal/lua/utf8lib_test.go new file mode 100644 index 0000000..9fbb28d --- /dev/null +++ b/internal/lua/utf8lib_test.go @@ -0,0 +1,430 @@ +// Copyright 2025 The zb Authors +// SPDX-License-Identifier: MIT + +package lua + +import ( + "context" + "fmt" + "strings" + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + "zb.256lights.llc/pkg/internal/lualex" +) + +func TestUTF8Char(t *testing.T) { + tests := []struct { + args []int64 + want string + }{ + { + args: []int64{}, + want: "", + }, + { + args: []int64{'a'}, + want: "a", + }, + { + args: []int64{'h', 'e', 'l', 'l', 'o', ' ', 'W', 'o', 'r', 'l', 'd'}, + want: "hello World", + }, + { + args: []int64{0x6c49}, + want: "\u6c49", + }, + { + args: []int64{0x7fffffff}, + want: "\xfd\xbf\xbf\xbf\xbf\xbf", + }, + } + + ctx := context.Background() + for _, test := range tests { + func() { + state := new(State) + defer func() { + if err := state.Close(); err != nil { + t.Error("Close:", err) + } + }() + + state.PushClosure(0, OpenUTF8) + if err := state.Call(ctx, 0, 1); err != nil { + t.Error(err) + return + } + if _, err := state.Field(ctx, -1, "char"); err != nil { + t.Error(err) + return + } + + testName := "utf8.char(" + for i, arg := range test.args { + state.PushInteger(arg) + if i == 0 { + testName = fmt.Sprintf("%s%d", testName, arg) + } else { + testName = fmt.Sprintf("%s, %d", testName, arg) + } + } + testName += ")" + + if err := state.Call(ctx, len(test.args), 1); err != nil { + t.Errorf("%s: %v", testName, err) + return + } + + if got, want := state.Type(-1), TypeString; got != want { + t.Errorf("type(%s) = %v; want %v", testName, got, want) + } else if got, ok := state.ToString(-1); got != test.want || !ok { + t.Errorf("%s = %q; want %q", testName, got, test.want) + } + }() + } +} + +func TestUTF8Codepoint(t *testing.T) { + tests := []struct { + s string + i int64 + j int64 + lax bool + + want []int64 + wantError string + }{ + { + s: "", + i: 1, + j: 1, + wantError: "out of bounds", + }, + { + s: "\x00", + i: 1, + j: 1, + want: []int64{0}, + }, + { + s: "a", + i: 1, + j: 1, + want: []int64{'a'}, + }, + { + s: "hello World", + i: 1, + j: -1, + want: []int64{'h', 'e', 'l', 'l', 'o', ' ', 'W', 'o', 'r', 'l', 'd'}, + }, + { + s: "\u6c49\u5b57/\u6f22\u5b57", + i: 1, + j: -1, + want: []int64{0x6c49, 0x5b57, '/', 0x6f22, 0x5b57}, + }, + { + s: "\u6c49\u5b57/\u6f22\u5b57", + i: 1, + j: 1, + want: []int64{0x6c49}, + }, + { + s: "áéí\x80", + i: -8, + j: 1, + wantError: "out of bounds", + }, + { + s: "\xed\xa0\x80", + i: 1, + j: 1, + lax: true, + want: []int64{0xd800}, + }, + { + s: "\xed\xbf\xbf", + i: 1, + j: 1, + lax: true, + want: []int64{0xdfff}, + }, + { + s: "\xfd\xbf\xbf\xbf\xbf\xbf", + i: 1, + j: 1, + lax: true, + want: []int64{0x7fffffff}, + }, + } + + ctx := context.Background() + for _, test := range tests { + func() { + state := new(State) + defer func() { + if err := state.Close(); err != nil { + t.Error("Close:", err) + } + }() + + state.PushClosure(0, OpenUTF8) + if err := state.Call(ctx, 0, 1); err != nil { + t.Error(err) + return + } + if _, err := state.Field(ctx, -1, "codepoint"); err != nil { + t.Error(err) + return + } + + testName := fmt.Sprintf("utf8.codepoint(%s", lualex.Quote(test.s)) + funcIndex := state.Top() + state.PushString(test.s) + if test.i != 1 || test.i != test.j || test.lax { + state.PushInteger(test.i) + testName = fmt.Sprintf("%s, %d", testName, test.i) + if test.i != test.j || test.lax { + state.PushInteger(test.j) + testName = fmt.Sprintf("%s, %d", testName, test.j) + if test.lax { + state.PushBoolean(true) + testName += ", true" + } + } + } + testName += ")" + + if err := state.Call(ctx, state.Top()-funcIndex, MultipleReturns); err != nil { + if test.wantError == "" { + t.Errorf("%s: %v", testName, err) + } else if got := err.Error(); !strings.Contains(got, test.wantError) { + t.Errorf("%s raised: %s; want message to contain %q", testName, got, test.wantError) + } + return + } + if test.wantError != "" { + t.Errorf("%s did not raise an error (expected %q)", testName, test.wantError) + return + } + + var got []int64 + for i, n := funcIndex, state.Top(); i <= n; i++ { + if got, want := state.Type(i), TypeNumber; got != want || !state.IsInteger(i) { + t.Errorf("type(select(%d, %s)) = %v; want integer", i-funcIndex+1, testName, got) + } + n, _ := state.ToInteger(i) + got = append(got, n) + } + if diff := cmp.Diff(test.want, got, cmpopts.EquateEmpty()); diff != "" { + t.Errorf("%s (-want +got):\n%s", testName, diff) + } + }() + } +} + +func TestUTF8Len(t *testing.T) { + tests := []struct { + s string + i int64 + j int64 + lax bool + + want int64 + fail bool + wantError string + }{ + {s: "", i: 1, j: -1, want: 0}, + {s: "abc", i: 0, j: 2, wantError: "out of bounds"}, + {s: "abc", i: 1, j: 4, wantError: "out of bounds"}, + {s: "hello World", i: 1, j: -1, want: 11}, + {s: "hello World", i: 12, j: -1, want: 0}, + {s: "\u6c49\u5b57/\u6f22\u5b57", i: 1, j: 1, want: 1}, + } + + ctx := context.Background() + for _, test := range tests { + func() { + state := new(State) + defer func() { + if err := state.Close(); err != nil { + t.Error("Close:", err) + } + }() + + state.PushClosure(0, OpenUTF8) + if err := state.Call(ctx, 0, 1); err != nil { + t.Error(err) + return + } + if _, err := state.Field(ctx, -1, "len"); err != nil { + t.Error(err) + return + } + + testName := fmt.Sprintf("utf8.len(%s", lualex.Quote(test.s)) + funcIndex := state.Top() + state.PushString(test.s) + if test.i != 1 || test.j != -1 || test.lax { + state.PushInteger(test.i) + testName = fmt.Sprintf("%s, %d", testName, test.i) + if test.j != -1 || test.lax { + state.PushInteger(test.j) + testName = fmt.Sprintf("%s, %d", testName, test.j) + if test.lax { + state.PushBoolean(true) + testName += ", true" + } + } + } + testName += ")" + + if err := state.Call(ctx, state.Top()-funcIndex, 2); err != nil { + if test.wantError == "" { + t.Errorf("%s: %v", testName, err) + } else if got := err.Error(); !strings.Contains(got, test.wantError) { + t.Errorf("%s raised: %s; want message to contain %q", testName, got, test.wantError) + } + return + } + if test.wantError != "" { + t.Errorf("%s did not raise an error (expected %q)", testName, test.wantError) + return + } + + switch tp := state.Type(-2); tp { + case TypeNumber: + if test.fail { + n, _ := state.ToNumber(-2) + t.Errorf("%s = %g; want nil", testName, n) + } else if got, ok := state.ToInteger(-2); !ok { + n, _ := state.ToNumber(-2) + t.Errorf("%s = %g; want %d", testName, n, test.want) + } else if got != test.want { + t.Errorf("%s = %d; want %d", testName, got, test.want) + } + if got, want := state.Type(-1), TypeNil; got != want { + t.Errorf("type(select(2, %s)) = %v; want %v", testName, got, want) + } + case TypeNil: + if !test.fail { + t.Errorf("%s = nil; want %d", testName, test.want) + } else if got, want := state.Type(-1), TypeNumber; got != want { + t.Errorf("type(select(2, %s)) = %v; want %v", testName, got, want) + } else if got, ok := state.ToInteger(-1); !ok { + n, _ := state.ToNumber(-1) + t.Errorf("%s = nil, %g; want nil, %d", testName, n, test.want) + } else if got != test.want { + t.Errorf("%s = nil, %d; want nil, %d", testName, got, test.want) + } + default: + want := TypeNumber.String() + if test.fail { + want = TypeNil.String() + } + t.Errorf("type(%s) = %v; want %s", testName, tp, want) + } + }() + } +} + +func TestUTF8Offset(t *testing.T) { + tests := []struct { + s string + n int64 + i int64 + + want int64 + fail bool + wantError string + }{ + {s: "", n: 1, i: 1, want: 1}, + {s: "alo", n: 5, i: 1, fail: true}, + {s: "alo", n: -4, i: 4, fail: true}, + {s: "abc", n: 1, i: 5, wantError: "position out of bounds"}, + {s: "abc", n: 1, i: -4, wantError: "position out of bounds"}, + {s: "", n: 1, i: 2, wantError: "position out of bounds"}, + {s: "", n: 1, i: -1, wantError: "position out of bounds"}, + {s: "𦧺", n: 1, i: 2, wantError: "continuation byte"}, + {s: "𦧺", n: 1, i: 2, wantError: "continuation byte"}, + {s: "\x80", n: 1, i: 1, wantError: "continuation byte"}, + {s: "hello World", n: 0, i: 1, want: 1}, + {s: "hello World", n: 11, i: 1, want: 11}, + {s: "hello World", n: 2, i: 11, want: 12}, + {s: "\u6c49\u5b57/\u6f22\u5b57", n: 1, i: 1, want: 1}, + {s: "\u6c49\u5b57/\u6f22\u5b57", n: 2, i: 1, want: 4}, + {s: "\u6c49\u5b57/\u6f22\u5b57", n: 2, i: 4, want: 7}, + {s: "\u6c49\u5b57/\u6f22\u5b57", n: 2, i: 7, want: 8}, + } + + ctx := context.Background() + for _, test := range tests { + func() { + state := new(State) + defer func() { + if err := state.Close(); err != nil { + t.Error("Close:", err) + } + }() + + state.PushClosure(0, OpenUTF8) + if err := state.Call(ctx, 0, 1); err != nil { + t.Error(err) + return + } + if _, err := state.Field(ctx, -1, "offset"); err != nil { + t.Error(err) + return + } + + testName := fmt.Sprintf("utf8.offset(%s, %d", lualex.Quote(test.s), test.n) + funcIndex := state.Top() + state.PushString(test.s) + state.PushInteger(test.n) + if !(test.n >= 0 && test.i == 1) && !(test.n < 0 && test.i == int64(len(test.s))+1) { + state.PushInteger(test.i) + testName = fmt.Sprintf("%s, %d", testName, test.i) + } + testName += ")" + + if err := state.Call(ctx, state.Top()-funcIndex, 1); err != nil { + if test.wantError == "" { + t.Errorf("%s: %v", testName, err) + } else if got := err.Error(); !strings.Contains(got, test.wantError) { + t.Errorf("%s raised: %s; want message to contain %q", testName, got, test.wantError) + } + return + } + if test.wantError != "" { + t.Errorf("%s did not raise an error (expected %q)", testName, test.wantError) + return + } + + switch tp := state.Type(-1); tp { + case TypeNumber: + if test.fail { + n, _ := state.ToNumber(-1) + t.Errorf("%s = %g; want nil", testName, n) + } else if got, ok := state.ToInteger(-1); !ok { + n, _ := state.ToNumber(-1) + t.Errorf("%s = %g; want %d", testName, n, test.want) + } else if got != test.want { + t.Errorf("%s = %d; want %d", testName, got, test.want) + } + case TypeNil: + if !test.fail { + t.Errorf("%s = nil; want %d", testName, test.want) + } + default: + want := TypeNumber.String() + if test.fail { + want = TypeNil.String() + } + t.Errorf("type(%s) = %v; want %s", testName, tp, want) + } + }() + } +}