local setmetatable, tonumber, tostring = setmetatable, tonumber, tostring local floor, inf = math.floor, math.huge local mininteger, tointeger = math.mininteger or nil, math.tointeger or nil local byte, char, find, gsub, match, sub = string.byte, string.char, string.find, string.gsub, string.match, string.sub local function _decode_error(pos, errmsg) error("parse error at " .. pos .. ": " .. errmsg, 2) end local f_str_ctrl_pat if _VERSION == "Lua 5.1" then -- use the cluttered pattern because lua 5.1 does not handle \0 in a pattern correctly f_str_ctrl_pat = '[^\32-\255]' else f_str_ctrl_pat = '[\0-\31]' end local _ENV = nil local function newdecoder() local json, pos, nullv, arraylen, rec_depth -- `f` is the temporary for dispatcher[c] and -- the dummy for the first return value of `find` local dispatcher, f --[[ Helper --]] local function decode_error(errmsg) return _decode_error(pos, errmsg) end --[[ Invalid --]] local function f_err() decode_error('invalid value') end --[[ Constants --]] -- null local function f_nul() if sub(json, pos, pos+2) == 'ull' then pos = pos+3 return nullv end decode_error('invalid value') end -- false local function f_fls() if sub(json, pos, pos+3) == 'alse' then pos = pos+4 return false end decode_error('invalid value') end -- true local function f_tru() if sub(json, pos, pos+2) == 'rue' then pos = pos+3 return true end decode_error('invalid value') end --[[ Numbers Conceptually, the longest prefix that matches to `[-+.0-9A-Za-z]+` (in regexp) is captured as a number and its conformance to the JSON spec is checked. --]] -- deal with non-standard locales local radixmark = match(tostring(0.5), '[^0-9]') local fixedtonumber = tonumber if radixmark ~= '.' then if find(radixmark, '%W') then radixmark = '%' .. radixmark end fixedtonumber = function(s) return tonumber(gsub(s, '.', radixmark)) end end local function number_error() return decode_error('invalid number') end -- `0(\.[0-9]*)?([eE][+-]?[0-9]*)?` local function f_zro(mns) local num, c = match(json, '^(%.?[0-9]*)([-+.A-Za-z]?)', pos) -- skipping 0 if num == '' then if c == '' then if mns then return -0.0 end return 0 end if c == 'e' or c == 'E' then num, c = match(json, '^([^eE]*[eE][-+]?[0-9]+)([-+.A-Za-z]?)', pos) if c == '' then pos = pos + #num if mns then return -0.0 end return 0.0 end end number_error() end if byte(num) ~= 0x2E or byte(num, -1) == 0x2E then number_error() end if c ~= '' then if c == 'e' or c == 'E' then num, c = match(json, '^([^eE]*[eE][-+]?[0-9]+)([-+.A-Za-z]?)', pos) end if c ~= '' then number_error() end end pos = pos + #num c = fixedtonumber(num) if mns then c = -c end return c end -- `[1-9][0-9]*(\.[0-9]*)?([eE][+-]?[0-9]*)?` local function f_num(mns) pos = pos-1 local num, c = match(json, '^([0-9]+%.?[0-9]*)([-+.A-Za-z]?)', pos) if byte(num, -1) == 0x2E then -- error if ended with period number_error() end if c ~= '' then if c ~= 'e' and c ~= 'E' then number_error() end num, c = match(json, '^([^eE]*[eE][-+]?[0-9]+)([-+.A-Za-z]?)', pos) if not num or c ~= '' then number_error() end end pos = pos + #num c = fixedtonumber(num) if mns then c = -c if c == mininteger and not find(num, '[^0-9]') then c = mininteger end end return c end -- skip minus sign local function f_mns() local c = byte(json, pos) if c then pos = pos+1 if c > 0x30 then if c < 0x3A then return f_num(true) end else if c > 0x2F then return f_zro(true) end end end decode_error('invalid number') end --[[ Strings --]] local f_str_hextbl = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, inf, inf, inf, inf, inf, inf, inf, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, __index = function() return inf end } setmetatable(f_str_hextbl, f_str_hextbl) local f_str_escapetbl = { ['"'] = '"', ['\\'] = '\\', ['/'] = '/', ['b'] = '\b', ['f'] = '\f', ['n'] = '\n', ['r'] = '\r', ['t'] = '\t', __index = function() decode_error("invalid escape sequence") end } setmetatable(f_str_escapetbl, f_str_escapetbl) local function surrogate_first_error() return decode_error("1st surrogate pair byte not continued by 2nd") end local f_str_surrogate_prev = 0 local function f_str_subst(ch, ucode) if ch == 'u' then local c1, c2, c3, c4, rest = byte(ucode, 1, 5) ucode = f_str_hextbl[c1-47] * 0x1000 + f_str_hextbl[c2-47] * 0x100 + f_str_hextbl[c3-47] * 0x10 + f_str_hextbl[c4-47] if ucode ~= inf then if ucode < 0x80 then -- 1byte if rest then return char(ucode, rest) end return char(ucode) elseif ucode < 0x800 then -- 2bytes c1 = floor(ucode / 0x40) c2 = ucode - c1 * 0x40 c1 = c1 + 0xC0 c2 = c2 + 0x80 if rest then return char(c1, c2, rest) end return char(c1, c2) elseif ucode < 0xD800 or 0xE000 <= ucode then -- 3bytes c1 = floor(ucode / 0x1000) ucode = ucode - c1 * 0x1000 c2 = floor(ucode / 0x40) c3 = ucode - c2 * 0x40 c1 = c1 + 0xE0 c2 = c2 + 0x80 c3 = c3 + 0x80 if rest then return char(c1, c2, c3, rest) end return char(c1, c2, c3) elseif 0xD800 <= ucode and ucode < 0xDC00 then -- surrogate pair 1st if f_str_surrogate_prev == 0 then f_str_surrogate_prev = ucode if not rest then return '' end surrogate_first_error() end f_str_surrogate_prev = 0 surrogate_first_error() else -- surrogate pair 2nd if f_str_surrogate_prev ~= 0 then ucode = 0x10000 + (f_str_surrogate_prev - 0xD800) * 0x400 + (ucode - 0xDC00) f_str_surrogate_prev = 0 c1 = floor(ucode / 0x40000) ucode = ucode - c1 * 0x40000 c2 = floor(ucode / 0x1000) ucode = ucode - c2 * 0x1000 c3 = floor(ucode / 0x40) c4 = ucode - c3 * 0x40 c1 = c1 + 0xF0 c2 = c2 + 0x80 c3 = c3 + 0x80 c4 = c4 + 0x80 if rest then return char(c1, c2, c3, c4, rest) end return char(c1, c2, c3, c4) end decode_error("2nd surrogate pair byte appeared without 1st") end end decode_error("invalid unicode codepoint literal") end if f_str_surrogate_prev ~= 0 then f_str_surrogate_prev = 0 surrogate_first_error() end return f_str_escapetbl[ch] .. ucode end -- caching interpreted keys for speed local f_str_keycache = setmetatable({}, {__mode="v"}) local function f_str(iskey) local newpos = pos local tmppos, c1, c2 repeat newpos = find(json, '"', newpos, true) -- search '"' if not newpos then decode_error("unterminated string") end tmppos = newpos-1 newpos = newpos+1 c1, c2 = byte(json, tmppos-1, tmppos) if c2 == 0x5C and c1 == 0x5C then -- skip preceding '\\'s repeat tmppos = tmppos-2 c1, c2 = byte(json, tmppos-1, tmppos) until c2 ~= 0x5C or c1 ~= 0x5C tmppos = newpos-2 end until c2 ~= 0x5C -- leave if '"' is not preceded by '\' local str = sub(json, pos, tmppos) pos = newpos if iskey then -- check key cache tmppos = f_str_keycache[str] -- reuse tmppos for cache key/val if tmppos then return tmppos end tmppos = str end if find(str, f_str_ctrl_pat) then decode_error("unescaped control string") end if find(str, '\\', 1, true) then -- check whether a backslash exists -- We need to grab 4 characters after the escape char, -- for encoding unicode codepoint to UTF-8. -- As we need to ensure that every first surrogate pair byte is -- immediately followed by second one, we grab upto 5 characters and -- check the last for this purpose. str = gsub(str, '\\(.)([^\\]?[^\\]?[^\\]?[^\\]?[^\\]?)', f_str_subst) if f_str_surrogate_prev ~= 0 then f_str_surrogate_prev = 0 decode_error("1st surrogate pair byte not continued by 2nd") end end if iskey then -- commit key cache f_str_keycache[tmppos] = str end return str end --[[ Arrays, Objects --]] -- array local function f_ary() rec_depth = rec_depth + 1 if rec_depth > 1000 then decode_error('too deeply nested json (> 1000)') end local ary = {} pos = match(json, '^[ \n\r\t]*()', pos) local i = 0 if byte(json, pos) == 0x5D then -- check closing bracket ']' which means the array empty pos = pos+1 else local newpos = pos repeat i = i+1 f = dispatcher[byte(json,newpos)] -- parse value pos = newpos+1 ary[i] = f() newpos = match(json, '^[ \n\r\t]*,[ \n\r\t]*()', pos) -- check comma until not newpos newpos = match(json, '^[ \n\r\t]*%]()', pos) -- check closing bracket if not newpos then decode_error("no closing bracket of an array") end pos = newpos end if arraylen then -- commit the length of the array if `arraylen` is set ary[0] = i end rec_depth = rec_depth - 1 return ary end -- objects local function f_obj() rec_depth = rec_depth + 1 if rec_depth > 1000 then decode_error('too deeply nested json (> 1000)') end local obj = {} pos = match(json, '^[ \n\r\t]*()', pos) if byte(json, pos) == 0x7D then -- check closing bracket '}' which means the object empty pos = pos+1 else local newpos = pos repeat if byte(json, newpos) ~= 0x22 then -- check '"' decode_error("not key") end pos = newpos+1 local key = f_str(true) -- parse key -- optimized for compact json -- c1, c2 == ':', or -- c1, c2, c3 == ':', ' ', f = f_err local c1, c2, c3 = byte(json, pos, pos+3) if c1 == 0x3A then if c2 ~= 0x20 then f = dispatcher[c2] newpos = pos+2 else f = dispatcher[c3] newpos = pos+3 end end if f == f_err then -- read a colon and arbitrary number of spaces newpos = match(json, '^[ \n\r\t]*:[ \n\r\t]*()', pos) if not newpos then decode_error("no colon after a key") end f = dispatcher[byte(json, newpos)] newpos = newpos+1 end pos = newpos obj[key] = f() -- parse value newpos = match(json, '^[ \n\r\t]*,[ \n\r\t]*()', pos) until not newpos newpos = match(json, '^[ \n\r\t]*}()', pos) if not newpos then decode_error("no closing bracket of an object") end pos = newpos end rec_depth = rec_depth - 1 return obj end --[[ The jump table to dispatch a parser for a value, indexed by the code of the value's first char. Nil key means the end of json. --]] dispatcher = { [0] = f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_str, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_mns, f_err, f_err, f_zro, f_num, f_num, f_num, f_num, f_num, f_num, f_num, f_num, f_num, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_ary, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_fls, f_err, f_err, f_err, f_err, f_err, f_err, f_err, f_nul, f_err, f_err, f_err, f_err, f_err, f_tru, f_err, f_err, f_err, f_err, f_err, f_err, f_obj, f_err, f_err, f_err, f_err, __index = function() decode_error("unexpected termination") end } setmetatable(dispatcher, dispatcher) --[[ run decoder --]] local function decode(json_, pos_, nullv_, arraylen_) json, pos, nullv, arraylen = json_, pos_, nullv_, arraylen_ rec_depth = 0 pos = match(json, '^[ \n\r\t]*()', pos) f = dispatcher[byte(json, pos)] pos = pos+1 local v = f() if pos_ then return v, pos else f, pos = find(json, '^[ \n\r\t]*', pos) if pos ~= #json then decode_error('json ended') end return v end end return decode end return newdecoder