local setmetatable, tonumber, tostring =
setmetatable, tonumber, tostring
local floor, inf =
math.floor, math.huge
local mininteger, tointeger =
math.mininteger or nil, math.tointeger or nil
local byte, char, find, gsub, match, sub =
string.byte, string.char, string.find, string.gsub, string.match, string.sub
local function _parse_error(pos, errmsg)
error("parse error at " .. pos .. ": " .. errmsg, 2)
end
local f_str_ctrl_pat
if _VERSION == "Lua 5.1" then
-- use the cluttered pattern because lua 5.1 does not handle \0 in a pattern correctly
f_str_ctrl_pat = '[^\32-\255]'
else
f_str_ctrl_pat = '[\0-\31]'
end
local type, unpack = type, table.unpack or unpack
local open = io.open
local _ENV = nil
local function nop() end
local function newparser(src, saxtbl)
local json, jsonnxt, rec_depth
local jsonlen, pos, acc = 0, 1, 0
-- `f` is the temporary for dispatcher[c] and
-- the dummy for the first return value of `find`
local dispatcher, f
-- initialize
if type(src) == 'string' then
json = src
jsonlen = #json
jsonnxt = function()
json = ''
jsonlen = 0
jsonnxt = nop
end
else
jsonnxt = function()
acc = acc + jsonlen
pos = 1
repeat
json = src()
if not json then
json = ''
jsonlen = 0
jsonnxt = nop
return
end
jsonlen = #json
until jsonlen > 0
end
jsonnxt()
end
local sax_startobject = saxtbl.startobject or nop
local sax_key = saxtbl.key or nop
local sax_endobject = saxtbl.endobject or nop
local sax_startarray = saxtbl.startarray or nop
local sax_endarray = saxtbl.endarray or nop
local sax_string = saxtbl.string or nop
local sax_number = saxtbl.number or nop
local sax_boolean = saxtbl.boolean or nop
local sax_null = saxtbl.null or nop
--[[
Helper
--]]
local function tryc()
local c = byte(json, pos)
if not c then
jsonnxt()
c = byte(json, pos)
end
return c
end
local function parse_error(errmsg)
return _parse_error(acc + pos, errmsg)
end
local function tellc()
return tryc() or parse_error("unexpected termination")
end
local function spaces() -- skip spaces and prepare the next char
while true do
pos = match(json, '^[ \n\r\t]*()', pos)
if pos <= jsonlen then
return
end
if jsonlen == 0 then
parse_error("unexpected termination")
end
jsonnxt()
end
end
--[[
Invalid
--]]
local function f_err()
parse_error('invalid value')
end
--[[
Constants
--]]
-- fallback slow constants parser
local function generic_constant(target, targetlen, ret, sax_f)
for i = 1, targetlen do
local c = tellc()
if byte(target, i) ~= c then
parse_error("invalid char")
end
pos = pos+1
end
return sax_f(ret)
end
-- null
local function f_nul()
if sub(json, pos, pos+2) == 'ull' then
pos = pos+3
return sax_null(nil)
end
return generic_constant('ull', 3, nil, sax_null)
end
-- false
local function f_fls()
if sub(json, pos, pos+3) == 'alse' then
pos = pos+4
return sax_boolean(false)
end
return generic_constant('alse', 4, false, sax_boolean)
end
-- true
local function f_tru()
if sub(json, pos, pos+2) == 'rue' then
pos = pos+3
return sax_boolean(true)
end
return generic_constant('rue', 3, true, sax_boolean)
end
--[[
Numbers
Conceptually, the longest prefix that matches to `[-+.0-9A-Za-z]+` (in regexp)
is captured as a number and its conformance to the JSON spec is checked.
--]]
-- deal with non-standard locales
local radixmark = match(tostring(0.5), '[^0-9]')
local fixedtonumber = tonumber
if radixmark ~= '.' then
if find(radixmark, '%W') then
radixmark = '%' .. radixmark
end
fixedtonumber = function(s)
return tonumber(gsub(s, '.', radixmark))
end
end
local function number_error()
return parse_error('invalid number')
end
-- fallback slow parser
local function generic_number(mns)
local buf = {}
local i = 1
local is_int = true
local c = byte(json, pos)
pos = pos+1
local function nxt()
buf[i] = c
i = i+1
c = tryc()
pos = pos+1
end
if c == 0x30 then
nxt()
if c and 0x30 <= c and c < 0x3A then
number_error()
end
else
repeat nxt() until not (c and 0x30 <= c and c < 0x3A)
end
if c == 0x2E then
is_int = false
nxt()
if not (c and 0x30 <= c and c < 0x3A) then
number_error()
end
repeat nxt() until not (c and 0x30 <= c and c < 0x3A)
end
if c == 0x45 or c == 0x65 then
is_int = false
nxt()
if c == 0x2B or c == 0x2D then
nxt()
end
if not (c and 0x30 <= c and c < 0x3A) then
number_error()
end
repeat nxt() until not (c and 0x30 <= c and c < 0x3A)
end
if c and (0x41 <= c and c <= 0x5B or
0x61 <= c and c <= 0x7B or
c == 0x2B or c == 0x2D or c == 0x2E) then
number_error()
end
pos = pos-1
local num = char(unpack(buf))
num = fixedtonumber(num)
if mns then
num = -num
if num == mininteger and is_int then
num = mininteger
end
end
return sax_number(num)
end
-- `0(\.[0-9]*)?([eE][+-]?[0-9]*)?`
local function f_zro(mns)
local num, c = match(json, '^(%.?[0-9]*)([-+.A-Za-z]?)', pos) -- skipping 0
if num == '' then
if pos > jsonlen then
pos = pos - 1
return generic_number(mns)
end
if c == '' then
if mns then
return sax_number(-0.0)
end
return sax_number(0)
end
if c == 'e' or c == 'E' then
num, c = match(json, '^([^eE]*[eE][-+]?[0-9]+)([-+.A-Za-z]?)', pos)
if c == '' then
pos = pos + #num
if pos > jsonlen then
pos = pos - #num - 1
return generic_number(mns)
end
if mns then
return sax_number(-0.0)
end
return sax_number(0.0)
end
end
pos = pos-1
return generic_number(mns)
end
if byte(num) ~= 0x2E or byte(num, -1) == 0x2E then
pos = pos-1
return generic_number(mns)
end
if c ~= '' then
if c == 'e' or c == 'E' then
num, c = match(json, '^([^eE]*[eE][-+]?[0-9]+)([-+.A-Za-z]?)', pos)
end
if c ~= '' then
pos = pos-1
return generic_number(mns)
end
end
pos = pos + #num
if pos > jsonlen then
pos = pos - #num - 1
return generic_number(mns)
end
c = fixedtonumber(num)
if mns then
c = -c
end
return sax_number(c)
end
-- `[1-9][0-9]*(\.[0-9]*)?([eE][+-]?[0-9]*)?`
local function f_num(mns)
pos = pos-1
local num, c = match(json, '^([0-9]+%.?[0-9]*)([-+.A-Za-z]?)', pos)
if byte(num, -1) == 0x2E then -- error if ended with period
return generic_number(mns)
end
if c ~= '' then
if c ~= 'e' and c ~= 'E' then
return generic_number(mns)
end
num, c = match(json, '^([^eE]*[eE][-+]?[0-9]+)([-+.A-Za-z]?)', pos)
if not num or c ~= '' then
return generic_number(mns)
end
end
pos = pos + #num
if pos > jsonlen then
pos = pos - #num
return generic_number(mns)
end
c = fixedtonumber(num)
if mns then
c = -c
if c == mininteger and not find(num, '[^0-9]') then
c = mininteger
end
end
return sax_number(c)
end
-- skip minus sign
local function f_mns()
local c = byte(json, pos) or tellc()
if c then
pos = pos+1
if c > 0x30 then
if c < 0x3A then
return f_num(true)
end
else
if c > 0x2F then
return f_zro(true)
end
end
end
parse_error("invalid number")
end
--[[
Strings
--]]
local f_str_hextbl = {
0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7,
0x8, 0x9, inf, inf, inf, inf, inf, inf,
inf, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, inf,
inf, inf, inf, inf, inf, inf, inf, inf,
inf, inf, inf, inf, inf, inf, inf, inf,
inf, inf, inf, inf, inf, inf, inf, inf,
inf, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF,
__index = function()
return inf
end
}
setmetatable(f_str_hextbl, f_str_hextbl)
local f_str_escapetbl = {
['"'] = '"',
['\\'] = '\\',
['/'] = '/',
['b'] = '\b',
['f'] = '\f',
['n'] = '\n',
['r'] = '\r',
['t'] = '\t',
__index = function()
parse_error("invalid escape sequence")
end
}
setmetatable(f_str_escapetbl, f_str_escapetbl)
local function surrogate_first_error()
return parse_error("1st surrogate pair byte not continued by 2nd")
end
local f_str_surrogate_prev = 0
local function f_str_subst(ch, ucode)
if ch == 'u' then
local c1, c2, c3, c4, rest = byte(ucode, 1, 5)
ucode = f_str_hextbl[c1-47] * 0x1000 +
f_str_hextbl[c2-47] * 0x100 +
f_str_hextbl[c3-47] * 0x10 +
f_str_hextbl[c4-47]
if ucode ~= inf then
if ucode < 0x80 then -- 1byte
if rest then
return char(ucode, rest)
end
return char(ucode)
elseif ucode < 0x800 then -- 2bytes
c1 = floor(ucode / 0x40)
c2 = ucode - c1 * 0x40
c1 = c1 + 0xC0
c2 = c2 + 0x80
if rest then
return char(c1, c2, rest)
end
return char(c1, c2)
elseif ucode < 0xD800 or 0xE000 <= ucode then -- 3bytes
c1 = floor(ucode / 0x1000)
ucode = ucode - c1 * 0x1000
c2 = floor(ucode / 0x40)
c3 = ucode - c2 * 0x40
c1 = c1 + 0xE0
c2 = c2 + 0x80
c3 = c3 + 0x80
if rest then
return char(c1, c2, c3, rest)
end
return char(c1, c2, c3)
elseif 0xD800 <= ucode and ucode < 0xDC00 then -- surrogate pair 1st
if f_str_surrogate_prev == 0 then
f_str_surrogate_prev = ucode
if not rest then
return ''
end
surrogate_first_error()
end
f_str_surrogate_prev = 0
surrogate_first_error()
else -- surrogate pair 2nd
if f_str_surrogate_prev ~= 0 then
ucode = 0x10000 +
(f_str_surrogate_prev - 0xD800) * 0x400 +
(ucode - 0xDC00)
f_str_surrogate_prev = 0
c1 = floor(ucode / 0x40000)
ucode = ucode - c1 * 0x40000
c2 = floor(ucode / 0x1000)
ucode = ucode - c2 * 0x1000
c3 = floor(ucode / 0x40)
c4 = ucode - c3 * 0x40
c1 = c1 + 0xF0
c2 = c2 + 0x80
c3 = c3 + 0x80
c4 = c4 + 0x80
if rest then
return char(c1, c2, c3, c4, rest)
end
return char(c1, c2, c3, c4)
end
parse_error("2nd surrogate pair byte appeared without 1st")
end
end
parse_error("invalid unicode codepoint literal")
end
if f_str_surrogate_prev ~= 0 then
f_str_surrogate_prev = 0
surrogate_first_error()
end
return f_str_escapetbl[ch] .. ucode
end
local function f_str(iskey)
local pos2 = pos
local newpos
local str = ''
local bs
while true do
while true do -- search '\' or '"'
newpos = find(json, '[\\"]', pos2)
if newpos then
break
end
str = str .. sub(json, pos, jsonlen)
if pos2 == jsonlen+2 then
pos2 = 2
else
pos2 = 1
end
jsonnxt()
if jsonlen == 0 then
parse_error("unterminated string")
end
end
if byte(json, newpos) == 0x22 then -- break if '"'
break
end
pos2 = newpos+2 -- skip '\<char>'
bs = true -- mark the existence of a backslash
end
str = str .. sub(json, pos, newpos-1)
pos = newpos+1
if find(str, f_str_ctrl_pat) then
parse_error("unescaped control string")
end
if bs then -- a backslash exists
-- We need to grab 4 characters after the escape char,
-- for encoding unicode codepoint to UTF-8.
-- As we need to ensure that every first surrogate pair byte is
-- immediately followed by second one, we grab upto 5 characters and
-- check the last for this purpose.
str = gsub(str, '\\(.)([^\\]?[^\\]?[^\\]?[^\\]?[^\\]?)', f_str_subst)
if f_str_surrogate_prev ~= 0 then
f_str_surrogate_prev = 0
parse_error("1st surrogate pair byte not continued by 2nd")
end
end
if iskey then
return sax_key(str)
end
return sax_string(str)
end
--[[
Arrays, Objects
--]]
-- arrays
local function f_ary()
rec_depth = rec_depth + 1
if rec_depth > 1000 then
parse_error('too deeply nested json (> 1000)')
end
sax_startarray()
spaces()
if byte(json, pos) == 0x5D then -- check closing bracket ']' which means the array empty
pos = pos+1
else
local newpos
while true do
f = dispatcher[byte(json, pos)] -- parse value
pos = pos+1
f()
newpos = match(json, '^[ \n\r\t]*,[ \n\r\t]*()', pos) -- check comma
if newpos then
pos = newpos
else
newpos = match(json, '^[ \n\r\t]*%]()', pos) -- check closing bracket
if newpos then
pos = newpos
break
end
spaces() -- since the current chunk can be ended, skip spaces toward following chunks
local c = byte(json, pos)
pos = pos+1
if c == 0x2C then -- check comma again
spaces()
elseif c == 0x5D then -- check closing bracket again
break
else
parse_error("no closing bracket of an array")
end
end
if pos > jsonlen then
spaces()
end
end
end
rec_depth = rec_depth - 1
return sax_endarray()
end
-- objects
local function f_obj()
rec_depth = rec_depth + 1
if rec_depth > 1000 then
parse_error('too deeply nested json (> 1000)')
end
sax_startobject()
spaces()
if byte(json, pos) == 0x7D then -- check closing bracket '}' which means the object empty
pos = pos+1
else
local newpos
while true do
if byte(json, pos) ~= 0x22 then
parse_error("not key")
end
pos = pos+1
f_str(true) -- parse key
newpos = match(json, '^[ \n\r\t]*:[ \n\r\t]*()', pos) -- check colon
if newpos then
pos = newpos
else
spaces() -- read spaces through chunks
if byte(json, pos) ~= 0x3A then -- check colon again
parse_error("no colon after a key")
end
pos = pos+1
spaces()
end
if pos > jsonlen then
spaces()
end
f = dispatcher[byte(json, pos)]
pos = pos+1
f() -- parse value
newpos = match(json, '^[ \n\r\t]*,[ \n\r\t]*()', pos) -- check comma
if newpos then
pos = newpos
else
newpos = match(json, '^[ \n\r\t]*}()', pos) -- check closing bracket
if newpos then
pos = newpos
break
end
spaces() -- read spaces through chunks
local c = byte(json, pos)
pos = pos+1
if c == 0x2C then -- check comma again
spaces()
elseif c == 0x7D then -- check closing bracket again
break
else
parse_error("no closing bracket of an object")
end
end
if pos > jsonlen then
spaces()
end
end
end
rec_depth = rec_depth - 1
return sax_endobject()
end
--[[
The jump table to dispatch a parser for a value,
indexed by the code of the value's first char.
Key should be non-nil.
--]]
dispatcher = { [0] =
f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err,
f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err,
f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err,
f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err,
f_err, f_err, f_str, f_err, f_err, f_err, f_err, f_err,
f_err, f_err, f_err, f_err, f_err, f_mns, f_err, f_err,
f_zro, f_num, f_num, f_num, f_num, f_num, f_num, f_num,
f_num, f_num, f_err, f_err, f_err, f_err, f_err, f_err,
f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err,
f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err,
f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err,
f_err, f_err, f_err, f_ary, f_err, f_err, f_err, f_err,
f_err, f_err, f_err, f_err, f_err, f_err, f_fls, f_err,
f_err, f_err, f_err, f_err, f_err, f_err, f_nul, f_err,
f_err, f_err, f_err, f_err, f_tru, f_err, f_err, f_err,
f_err, f_err, f_err, f_obj, f_err, f_err, f_err, f_err,
}
--[[
public funcitons
--]]
local function run()
rec_depth = 0
spaces()
f = dispatcher[byte(json, pos)]
pos = pos+1
f()
end
local function read(n)
if n < 0 then
error("the argument must be non-negative")
end
local pos2 = (pos-1) + n
local str = sub(json, pos, pos2)
while pos2 > jsonlen and jsonlen ~= 0 do
jsonnxt()
pos2 = pos2 - (jsonlen - (pos-1))
str = str .. sub(json, pos, pos2)
end
if jsonlen ~= 0 then
pos = pos2+1
end
return str
end
local function tellpos()
return acc + pos
end
return {
run = run,
tryc = tryc,
read = read,
tellpos = tellpos,
}
end
local function newfileparser(fn, saxtbl)
local fp = open(fn)
local function gen()
local s
if fp then
s = fp:read(8192)
if not s then
fp:close()
fp = nil
end
end
return s
end
return newparser(gen, saxtbl)
end
return {
newparser = newparser,
newfileparser = newfileparser
}