-
Notifications
You must be signed in to change notification settings - Fork 0
/
json.lua
524 lines (466 loc) · 17.4 KB
/
json.lua
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
-----------------------------------------------------------------------------
-- JSON4Lua: JSON encoding / decoding support for the Lua language.
-- json Module.
-- Author: Craig Mason-Jones
-- Homepage: http://json.luaforge.net/
-- Version: 0.9.50
-- This module is released under the MIT License (MIT).
-- Please see LICENCE.txt for details.
--
-- USAGE:
-- This module exposes two functions:
-- encode(o)
-- Returns the table / string / boolean / number / nil / json.null value as a JSON-encoded string.
-- decode(json_string)
-- Returns a Lua object populated with the data encoded in the JSON string json_string.
--
-- REQUIREMENTS:
-- compat-5.1 if using Lua 5.0
--
-- CHANGELOG
-- 0.9.50 Radical performance improvement on decode from Eike Decker. Many thanks!
-- 0.9.40 Changed licence to MIT License (MIT)
-- 0.9.20 Introduction of local Lua functions for private functions (removed _ function prefix).
-- Fixed Lua 5.1 compatibility issues.
-- Introduced json.null to have null values in associative arrays.
-- encode() performance improvement (more than 50%) through table.concat rather than ..
-- Introduced decode ability to ignore /**/ comments in the JSON string.
-- 0.9.10 Fix to array encoding / decoding to correctly manage nil/null values in arrays.
-----------------------------------------------------------------------------
-----------------------------------------------------------------------------
-- Imports and dependencies
-----------------------------------------------------------------------------
local math = require('math')
local string = require("string")
local table = require("table")
local tostring = tostring
local json = {}
-- Public functions
-- Private functions
local decode_scanArray
local decode_scanComment
local decode_scanConstant
local decode_scanNumber
local decode_scanObject
local decode_scanString
local decode_scanWhitespace
local encodeString
local isArray
local isEncodable
-----------------------------------------------------------------------------
-- PUBLIC FUNCTIONS
-----------------------------------------------------------------------------
--- Encodes an arbitrary Lua object / variable.
-- @param v The Lua object / variable to be JSON encoded.
-- @return String containing the JSON encoding in internal Lua string format (i.e. not unicode)
local function encode(v)
-- Handle nil values
if v == nil then
return "null"
end
local vtype = type(v)
-- Handle strings
if vtype == 'string' then
return '"' .. encodeString(v) .. '"' -- Need to handle encoding in string
end
-- Handle booleans
if vtype == 'number' or vtype == 'boolean' then
return tostring(v)
end
-- Handle tables
if vtype == 'table' then
local rval = {}
-- Consider arrays separately
local bArray, maxCount = isArray(v)
if bArray then
for i = 1, maxCount do
table.insert(rval, encode(v[i]))
end
else -- An object, not an array
for i, j in pairs(v) do
if isEncodable(i) and isEncodable(j) then
table.insert(rval, '"' .. encodeString(i) .. '":' .. encode(j))
end
end
end
if bArray then
return '[' .. table.concat(rval, ',') .. ']'
else
return '{' .. table.concat(rval, ',') .. '}'
end
end
-- Handle null values
if vtype == 'function' and v == null then
return 'null'
end
assert(false, 'encode attempt to encode unsupported type ' .. vtype .. ':' .. tostring(v))
end
--- The null function allows one to specify a null value in an associative array (which is otherwise
-- discarded if you set the value with 'nil' in Lua. Simply set t = { first=json.null }
local function null()
return null -- so json.null() will also return null ;-)
end
--- Decodes a JSON string and returns the decoded value as a Lua data structure / value.
-- @param s The string to scan.
-- @return Lua objectthat was scanned, as a Lua table / string / number / boolean or nil.
local function decode(s)
-- Function is re-defined below after token and other items are created.
-- Just defined here for code neatness.
return null
end
-----------------------------------------------------------------------------
-- Internal, PRIVATE functions.
-----------------------------------------------------------------------------
--- Encodes a string to be JSON-compatible.
-- This just involves back-quoting inverted commas, back-quotes and newlines, I think ;-)
-- @param s The string to return as a JSON encoded (i.e. backquoted string)
-- @return The string appropriately escaped.
local qrep = { ["\\"] = "\\\\", ['"'] = '\\"', ['\n'] = '\\n', ['\t'] = '\\t' }
local function encodeString(s)
return tostring(s):gsub('["\\\n\t]', qrep)
end
-- Determines whether the given Lua type is an array or a table / dictionary.
-- We consider any table an array if it has indexes 1..n for its n items, and no
-- other data in the table.
-- I think this method is currently a little 'flaky', but can't think of a good way around it yet...
-- @param t The table to evaluate as an array
-- @return boolean, number True if the table can be represented as an array, false otherwise. If true,
-- the second returned value is the maximum
-- number of indexed elements in the array.
local function isArray(t)
-- Next we count all the elements, ensuring that any non-indexed elements are not-encodable
-- (with the possible exception of 'n')
local maxIndex = 0
for k, v in pairs(t) do
if (type(k) == 'number' and math.floor(k) == k and 1 <= k) then -- k,v is an indexed pair
if (not isEncodable(v)) then return false end -- All array elements must be encodable
maxIndex = math.max(maxIndex, k)
else
if (k == 'n') then
if v ~= table.getn(t) then return false end -- False if n does not hold the number of elements
else -- Else of (k=='n')
if isEncodable(v) then return false end
end -- End of (k~='n')
end -- End of k,v not an indexed pair
end -- End of loop across all pairs
return true, maxIndex
end
--- Determines whether the given Lua object / table / variable can be JSON encoded. The only
-- types that are JSON encodable are: string, boolean, number, nil, table and json.null.
-- In this implementation, all other types are ignored.
-- @param o The object to examine.
-- @return boolean True if the object should be JSON encoded, false if it should be ignored.
local function isEncodable(o)
local t = type(o)
return (t == 'string' or t == 'boolean' or t == 'number' or t == 'nil' or t == 'table') or (t == 'function' and o ==
null)
end
local decode
-- Radical performance improvement for decode from Eike Decker!
do
local type = type
local error = error
local assert = assert
local print = print
local tonumber = tonumber
-- initialize some values to be used in decoding function
-- initializes a table to contain a byte=>table mapping
-- the table contains tokens (byte values) as keys and maps them on other
-- token tables (mostly, the boolean value 'true' is used to indicate termination
-- of a token sequence)
-- the token table's purpose is, that it allows scanning a sequence of bytes
-- until something interesting has been found (e.g. a token that is not expected)
-- name is a descriptor for the table to be printed in error messages
local function init_token_table(tt)
local struct = {}
local value
function struct:link(other_tt)
value = other_tt
return struct
end
function struct:to(chars)
for i = 1, #chars do
tt[chars:byte(i)] = value
end
return struct
end
return function(name)
tt.name = name
return struct
end
end
-- keep "named" byte values at hands
local
c_esc,
c_e,
c_l,
c_r,
c_u,
c_f,
c_a,
c_s,
c_slash = ("\\elrufas/"):byte(1, 9)
-- token tables - tt_doublequote_string = strDoubleQuot, tt_singlequote_string = strSingleQuot
local
tt_object_key,
tt_object_colon,
tt_object_value,
tt_doublequote_string,
tt_singlequote_string,
tt_array_value,
tt_array_seperator,
tt_numeric,
tt_boolean,
tt_null,
tt_comment_start,
tt_comment_middle,
tt_ignore--< tt_ignore is special - marked tokens will be tt_ignored
= {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}
-- strings to be used in certain token tables
local strchars = "" -- all valid string characters (all except newlines)
local allchars = "" -- all characters that are valid in comments
--local escapechar = {}
for i = 0, 0xff do
local c = string.char(i)
if c ~= "\n" and c ~= "\r" then strchars = strchars .. c end
allchars = allchars .. c
--escapechar[i] = "\\" .. string.char(i)
end
--[[
charstounescape = "\"\'\\bfnrt/";
unescapechars = "\"'\\\b\f\n\r\t\/";
for i=1,#charstounescape do
escapechar[ charstounescape:byte(i) ] = unescapechars:sub(i,i)
end
]] --
-- obj key reader, expects the end of the object or a quoted string as key
init_token_table(tt_object_key) "object (' or \" or } or , expected)"
:link(tt_singlequote_string):to "'"
:link(tt_doublequote_string):to '"'
:link(true):to "}"
:link(tt_object_key):to ","
:link(tt_comment_start):to "/"
:link(tt_ignore):to " \t\r\n"
-- after the key, a colon is expected (or comment)
init_token_table(tt_object_colon) "object (: expected)"
:link(tt_object_value):to ":"
:link(tt_comment_start):to "/"
:link(tt_ignore):to " \t\r\n"
-- as values, anything is possible, numbers, arrays, objects, boolean, null, strings
init_token_table(tt_object_value) "object ({ or [ or ' or \" or number or boolean or null expected)"
:link(tt_object_key):to "{"
:link(tt_array_seperator):to "["
:link(tt_singlequote_string):to "'"
:link(tt_doublequote_string):to '"'
:link(tt_numeric):to "0123456789.-"
:link(tt_boolean):to "tf"
:link(tt_null):to "n"
:link(tt_comment_start):to "/"
:link(tt_ignore):to " \t\r\n"
-- token tables for reading strings
init_token_table(tt_doublequote_string) "double quoted string"
:link(tt_ignore):to(strchars)
:link(c_esc):to "\\"
:link(true):to '"'
init_token_table(tt_singlequote_string) "single quoted string"
:link(tt_ignore):to(strchars)
:link(c_esc):to "\\"
:link(true):to "'"
-- array reader that expects termination of the array or a comma that indicates the next value
init_token_table(tt_array_value) "array (, or ] expected)"
:link(tt_array_seperator):to ","
:link(true):to "]"
:link(tt_comment_start):to "/"
:link(tt_ignore):to " \t\r\n"
-- a value, pretty similar to tt_object_value
init_token_table(tt_array_seperator) "array ({ or [ or ' or \" or number or boolean or null expected)"
:link(tt_object_key):to "{"
:link(tt_array_seperator):to "["
:link(tt_singlequote_string):to "'"
:link(tt_doublequote_string):to '"'
:link(tt_comment_start):to "/"
:link(tt_numeric):to "0123456789.-"
:link(tt_boolean):to "tf"
:link(tt_null):to "n"
:link(tt_ignore):to " \t\r\n"
-- valid number tokens
init_token_table(tt_numeric) "number"
:link(tt_ignore):to "0123456789.-Ee"
-- once a comment has been started with /, a * is expected
init_token_table(tt_comment_start) "comment start (* expected)"
:link(tt_comment_middle):to "*"
-- now everything is allowed, watch out for * though. The next char is then checked manually
init_token_table(tt_comment_middle) "comment end"
:link(tt_ignore):to(allchars)
:link(true):to "*"
function decode(js_string)
local pos = 1 -- position in the string
-- read the next byte value
local function next_byte() pos = pos + 1 return js_string:byte(pos - 1) end
-- in case of error, report the location using line numbers
local function location()
local n = ("\n"):byte()
local line, lpos = 1, 0
for i = 1, pos do
if js_string:byte(i) == n then
line, lpos = line + 1, 1
else
lpos = lpos + 1
end
end
return "Line " .. line .. " character " .. lpos
end
-- debug func
--local function status (str)
-- print(str.." ("..s:sub(math.max(1,p-10),p+10)..")")
--end
-- read the next token, according to the passed token table
local function next_token(tok)
while pos <= #js_string do
local b = js_string:byte(pos)
local t = tok[b]
if not t then
error("Unexpected character at " .. location() .. ": " ..
string.char(b) .. " (" .. b .. ") when reading " .. tok.name .. "\nContext: \n" ..
js_string:sub(math.max(1, pos - 30), pos + 30) .. "\n" .. (" "):rep(pos + math.min(-1, 30 - pos)) .. "^")
end
pos = pos + 1
if t ~= tt_ignore then return t end
end
error("unexpected termination of JSON while looking for " .. tok.name)
end
-- read a string, double and single quoted ones
local function read_string(tok)
local start = pos
--local returnString = {}
repeat
local t = next_token(tok)
if t == c_esc then
--table.insert(returnString, js_string:sub(start, pos-2))
--table.insert(returnString, escapechar[ js_string:byte(pos) ])
pos = pos + 1
--start = pos
end -- jump over escaped chars, no matter what
until t == true
return (loadstring("return " .. js_string:sub(start - 1, pos - 1))())
-- We consider the situation where no escaped chars were encountered separately,
-- and use the fastest possible return in this case.
--if 0 == #returnString then
-- return js_string:sub(start,pos-2)
--else
-- table.insert(returnString, js_string:sub(start,pos-2))
-- return table.concat(returnString,"");
--end
--return js_string:sub(start,pos-2)
end
local function read_num()
local start = pos
while pos <= #js_string do
local b = js_string:byte(pos)
if not tt_numeric[b] then break end
pos = pos + 1
end
return tonumber(js_string:sub(start - 1, pos - 1))
end
-- read_bool and read_null are both making an assumption that I have not tested:
-- I would expect that the string extraction is more expensive than actually
-- making manual comparision of the byte values
local function read_bool()
pos = pos + 3
local a, b, c, d = js_string:byte(pos - 3, pos)
if a == c_r and b == c_u and c == c_e then return true end
pos = pos + 1
if a ~= c_a or b ~= c_l or c ~= c_s or d ~= c_e then
error("Invalid boolean: " .. js_string:sub(math.max(1, pos - 5), pos + 5))
end
return false
end
-- same as read_bool: only last
local function read_null()
pos = pos + 3
local u, l1, l2 = js_string:byte(pos - 3, pos - 1)
if u == c_u and l1 == c_l and l2 == c_l then return nil end
error("Invalid value (expected null):" .. js_string:sub(pos - 4, pos - 1) ..
" (" .. js_string:byte(pos - 1) .. "=" .. js_string:sub(pos - 1, pos - 1) .. " / " .. c_l .. ")")
end
local read_object_value, read_object_key, read_array, read_value, read_comment
-- read a value depending on what token was returned, might require info what was used (in case of comments)
function read_value(t, fromt)
if t == tt_object_key then return read_object_key({}) end
if t == tt_array_seperator then return read_array({}) end
if t == tt_singlequote_string or
t == tt_doublequote_string then return read_string(t) end
if t == tt_numeric then return read_num() end
if t == tt_boolean then return read_bool() end
if t == tt_null then return read_null() end
if t == tt_comment_start then return read_value(read_comment(fromt)) end
error("unexpected termination - " .. js_string:sub(math.max(1, pos - 10), pos + 10))
end
-- read comments until something noncomment like surfaces, using the token reader which was
-- used when stumbling over this comment
function read_comment(fromt)
while true do
next_token(tt_comment_start)
while true do
local t = next_token(tt_comment_middle)
if next_byte() == c_slash then
local t = next_token(fromt)
if t ~= tt_comment_start then return t end
break
end
end
end
end
-- read arrays, empty array expected as o arg
function read_array(o, i)
--if not i then status "arr open" end
i = i or 1
-- loop until ...
while true do
o[i] = read_value(next_token(tt_array_seperator), tt_array_seperator)
local t = next_token(tt_array_value)
if t == tt_comment_start then
t = read_comment(tt_array_value)
end
if t == true then -- ... we found a terminator token
--status "arr close"
return o
end
i = i + 1
end
end
-- object value reading
function read_object_value(o)
local t = next_token(tt_object_value)
return read_value(t, tt_object_value)
end
-- object key reading, might also terminate the object
function read_object_key(o)
while true do
local t = next_token(tt_object_key)
if t == tt_comment_start then
t = read_comment(tt_object_key)
end
if t == true then return o end
if t == tt_object_key then return read_object_key(o) end
local k = read_string(t)
if next_token(tt_object_colon) == tt_comment_start then
t = read_comment(tt_object_colon)
end
local v = read_object_value(o)
o[k] = v
end
end
-- now let's read data from our string and pretend it's an object value
local r = read_object_value()
if pos <= #js_string then
-- not sure about what to do with dangling characters
--error("Dangling characters in JSON code ("..location()..")")
end
return r
end
end
return {
encode = encode,
decode = decode
}