1042 lines
38 KiB
Lua
1042 lines
38 KiB
Lua
--[[
|
|
LPEGLJ
|
|
lpvm.lua
|
|
Virtual machine
|
|
Copyright (C) 2014 Rostislav Sacek.
|
|
based on LPeg v1.0 - PEG pattern matching for Lua
|
|
Lua.org & PUC-Rio written by Roberto Ierusalimschy
|
|
http://www.inf.puc-rio.br/~roberto/lpeg/
|
|
|
|
** Permission is hereby granted, free of charge, to any person obtaining
|
|
** a copy of this software and associated documentation files (the
|
|
** "Software"), to deal in the Software without restriction, including
|
|
** without limitation the rights to use, copy, modify, merge, publish,
|
|
** distribute, sublicense, and/or sell copies of the Software, and to
|
|
** permit persons to whom the Software is furnished to do so, subject to
|
|
** the following conditions:
|
|
**
|
|
** The above copyright notice and this permission notice shall be
|
|
** included in all copies or substantial portions of the Software.
|
|
**
|
|
** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
** SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
**
|
|
** [ MIT license: http://www.opensource.org/licenses/mit-license.php ]
|
|
--]]
|
|
|
|
local ffi = require "ffi"
|
|
local lpcap = require "lpcap"
|
|
--[[ Only for debug purpose
|
|
local lpprint = require"lpprint"
|
|
--]]
|
|
|
|
local band, rshift, lshift = bit.band, bit.rshift, bit.lshift
|
|
|
|
-- {======================================================
|
|
-- Virtual Machine
|
|
-- =======================================================
|
|
|
|
-- Interpret the result of a dynamic capture: false -> fail;
|
|
-- true -> keep current position; number -> next position.
|
|
-- Return new subject position. 'fr' is stack index where
|
|
-- is the result; 'curr' is current subject position; 'limit'
|
|
-- is subject's size.
|
|
|
|
local MAXBEHINDPREDICATE = 255 -- max behind for Look-behind predicate
|
|
local MAXOFF = 0xF -- maximum for full capture
|
|
local MAXBEHIND = math.max(MAXBEHINDPREDICATE, MAXOFF) -- maximum before current pos
|
|
local INITBACK = 400 -- default maximum size for call/backtrack stack
|
|
|
|
local IAny = 0 -- if no char, fail
|
|
local IChar = 1 -- if char != val, fail
|
|
local ISet = 2 -- if char not in val, fail
|
|
local ITestAny = 3 -- in no char, jump to 'offset'
|
|
local ITestChar = 4 -- if char != val, jump to 'offset'
|
|
local ITestSet = 5 -- if char not in val, jump to 'offset'
|
|
local ISpan = 6 -- read a span of chars in val
|
|
local IBehind = 7 -- walk back 'val' characters (fail if not possible)
|
|
local IRet = 8 -- return from a rule
|
|
local IEnd = 9 -- end of pattern
|
|
local IChoice = 10 -- stack a choice; next fail will jump to 'offset'
|
|
local IJmp = 11 -- jump to 'offset'
|
|
local ICall = 12 -- call rule at 'offset'
|
|
local IOpenCall = 13 -- call rule number 'offset' (must be closed to a ICall)
|
|
local ICommit = 14 -- pop choice and jump to 'offset'
|
|
local IPartialCommit = 15 -- update top choice to current position and jump
|
|
local IBackCommit = 16 -- "fails" but jump to its own 'offset'
|
|
local IFailTwice = 17 -- pop one choice and then fail
|
|
local IFail = 18 -- go back to saved state on choice and jump to saved offset
|
|
local IGiveup = 19 -- internal use
|
|
local IFullCapture = 20 -- complete capture of last 'off' chars
|
|
local IOpenCapture = 21 -- start a capture
|
|
local ICloseCapture = 22
|
|
local ICloseRunTime = 23
|
|
|
|
local Cclose = 0
|
|
local Cposition = 1
|
|
local Cconst = 2
|
|
local Cbackref = 3
|
|
local Carg = 4
|
|
local Csimple = 5
|
|
local Ctable = 6
|
|
local Cfunction = 7
|
|
local Cquery = 8
|
|
local Cstring = 9
|
|
local Cnum = 10
|
|
local Csubst = 11
|
|
local Cfold = 12
|
|
local Cruntime = 13
|
|
local Cgroup = 14
|
|
|
|
local BCapcandelete = 0x30000
|
|
local maxstack = INITBACK
|
|
local maxcapturedefault = 100
|
|
local maxmemo = 1000
|
|
local usememoization = false
|
|
local trace = false
|
|
|
|
local FAIL = -1
|
|
local LRFAIL = -1
|
|
local VOID = -2
|
|
local CHOICE = -3
|
|
local CALL = -4
|
|
|
|
ffi.cdef [[
|
|
typedef struct {
|
|
int code;
|
|
int val;
|
|
int offset;
|
|
int aux;
|
|
} PATTERN_ELEMENT;
|
|
typedef struct {
|
|
int allocsize;
|
|
int size;
|
|
PATTERN_ELEMENT *p;
|
|
} PATTERN;
|
|
typedef struct {
|
|
int tag;
|
|
int val;
|
|
int ps;
|
|
int cap;
|
|
} TREEPATTERN_ELEMENT;
|
|
typedef struct {
|
|
int id;
|
|
int treesize;
|
|
PATTERN *code;
|
|
TREEPATTERN_ELEMENT p[?];
|
|
} TREEPATTERN;
|
|
|
|
typedef struct {
|
|
double s;
|
|
double X;
|
|
double memos;
|
|
int p;
|
|
int caplevel;
|
|
int pA;
|
|
int valuetabletop;
|
|
} STACK;
|
|
|
|
typedef struct {
|
|
double s;
|
|
int siz;
|
|
int idx;
|
|
int kind;
|
|
int candelete;
|
|
} CAPTURE;
|
|
|
|
void *malloc( size_t size );
|
|
void free( void *memblock );
|
|
void *realloc( void *memblock, size_t size );
|
|
]]
|
|
|
|
local treepatternelement = ffi.typeof('TREEPATTERN_ELEMENT')
|
|
local treepattern = ffi.typeof('TREEPATTERN')
|
|
local patternelement = ffi.typeof('PATTERN_ELEMENT')
|
|
local pattern = ffi.typeof('PATTERN')
|
|
local settype = ffi.typeof('int32_t[8]')
|
|
|
|
local function resdyncaptures(fr, curr, limit, checkstreamlen)
|
|
local typ = type(fr)
|
|
-- false value?
|
|
if not fr then
|
|
return FAIL -- and fail
|
|
elseif typ == 'boolean' then
|
|
-- true?
|
|
return curr -- keep current position
|
|
else
|
|
local res = fr -- new position
|
|
if res < curr or (limit and res > limit) or (not limit and checkstreamlen and not checkstreamlen(res - 2)) then
|
|
error("invalid position returned by match-time capture", 0)
|
|
end
|
|
return res
|
|
end
|
|
assert(false)
|
|
end
|
|
|
|
|
|
-- Add capture values returned by a dynamic capture to the capture list
|
|
-- 'base', nested inside a group capture. 'fd' indexes the first capture
|
|
-- value, 'n' is the number of values (at least 1).
|
|
|
|
local function adddyncaptures(s, base, index, n, fd, valuetable)
|
|
-- Cgroup capture is already there
|
|
assert(base[index].kind == Cgroup and base[index].siz == 0)
|
|
base[index].idx = 0 -- make it an anonymous group
|
|
base[index + 1] = {}
|
|
-- add runtime captures
|
|
for i = 1, n do
|
|
base[index + i].kind = Cruntime
|
|
base[index + i].siz = 1 -- mark it as closed
|
|
local ind = #valuetable + 1
|
|
valuetable[ind] = fd[i + 1]
|
|
base[index + i].idx = ind -- stack index of capture value
|
|
base[index + i].s = s
|
|
base[index + i + 1] = {}
|
|
end
|
|
base[index + n + 1].kind = Cclose -- close group
|
|
base[index + n + 1].siz = 1
|
|
base[index + n + 1].s = s
|
|
base[index + n + 2] = {}
|
|
end
|
|
|
|
|
|
-- Opcode interpreter
|
|
|
|
local function match(stream, last, o, s, op, valuetable, ...)
|
|
local arg = { ... }
|
|
local argcount = select('#', ...)
|
|
local len = #o
|
|
local ptr = ffi.cast('const unsigned char*', o)
|
|
s = s - 1
|
|
local stackptr = 0 -- point to first empty slot in stack
|
|
local captop = 0 -- point to first empty slot in captures
|
|
local STACK = ffi.new("STACK[?]", INITBACK)
|
|
local CAPTURE = ffi.new("CAPTURE[?]", maxcapturedefault)
|
|
local CAPTURESTACK = { { capture = CAPTURE, captop = captop, maxcapture = maxcapturedefault } }
|
|
local capturestackptr = #CAPTURESTACK
|
|
local maxcapture = maxcapturedefault
|
|
local stacklimit = INITBACK
|
|
local L = {}
|
|
local Memo1, Memo2 = {}, {}
|
|
local memoind = 0
|
|
local maxpointer = 2 ^ math.ceil(math.log(op.size) / math.log(2))
|
|
local nocapturereleased = true
|
|
|
|
local p = 0 -- current instruction
|
|
local streambufsize = 2 ^ 8
|
|
local streambufsizemask = streambufsize - 1 -- faster modulo
|
|
local streambufs = {}
|
|
local streambufoffset = 0
|
|
local streamstartbuffer = 0
|
|
local streambufferscount = 0
|
|
local level = -1
|
|
|
|
local function deletestreambuffers()
|
|
local min = s
|
|
for i = stackptr - 1, 0, -1 do
|
|
local val = STACK[i].s
|
|
if val >= 0 then
|
|
min = math.min(val, min)
|
|
end
|
|
end
|
|
|
|
for i = captop - 1, 0, -1 do
|
|
local val = CAPTURE[i].s
|
|
if val >= 0 then
|
|
min = math.min(val, min)
|
|
end
|
|
end
|
|
for i = streamstartbuffer + 1, streambufoffset - streambufsize, streambufsize do
|
|
-- max behind for full capture and max behind for Look-behind predicate
|
|
if i + streambufsize + MAXBEHIND < min then
|
|
streambufs[i] = nil
|
|
streambufferscount = streambufferscount - 1
|
|
else
|
|
streamstartbuffer = i - 1
|
|
break
|
|
end
|
|
end
|
|
end
|
|
|
|
local function addstreamdata(s, last)
|
|
local len = #s
|
|
local srcoffset = 0
|
|
if streambufferscount > 128 then
|
|
deletestreambuffers()
|
|
end
|
|
repeat
|
|
local offset = bit.band(streambufoffset, streambufsizemask)
|
|
if offset > 0 then
|
|
local index = streambufoffset - offset + 1
|
|
local count = math.min(len, streambufsize - offset)
|
|
ffi.copy(streambufs[index] + offset, s:sub(srcoffset + 1, srcoffset + 1 + count), count)
|
|
len = len - count
|
|
srcoffset = srcoffset + count
|
|
streambufoffset = streambufoffset + count
|
|
end
|
|
if len > 0 then
|
|
local index = streambufoffset - (bit.band(streambufoffset, streambufsizemask)) + 1
|
|
local buf = ffi.new('unsigned char[?]', streambufsize)
|
|
streambufferscount = streambufferscount + 1
|
|
streambufs[index] = buf
|
|
local count = math.min(len, streambufsize)
|
|
ffi.copy(buf, s:sub(srcoffset + 1, srcoffset + 1 + count), count)
|
|
len = len - count
|
|
srcoffset = srcoffset + count
|
|
streambufoffset = streambufoffset + count
|
|
end
|
|
if streambufoffset >= 2 ^ 47 then
|
|
error("too big input stream", 0)
|
|
end
|
|
until len == 0
|
|
end
|
|
|
|
local function getstreamchar(s)
|
|
local offset = bit.band(s, streambufsizemask)
|
|
local index = s - offset + 1
|
|
return streambufs[index][offset]
|
|
end
|
|
|
|
local checkstreamlen
|
|
|
|
local function getstreamstring(st, en)
|
|
-- TODO Optimalize access
|
|
local str = {}
|
|
local i = st >= 0 and st or 1
|
|
local to = en >= 0 and en or math.huge
|
|
while true do
|
|
if i > to then break end
|
|
if not checkstreamlen(i - 1) then return end
|
|
if last and (st < 0 or en < 0) then
|
|
for j = i, streambufoffset do
|
|
str[#str + 1] = string.char(getstreamchar(j - 1))
|
|
end
|
|
en = en < 0 and streambufoffset + en + 1 or en
|
|
en = st > 0 and en - st + 1 or en
|
|
st = st < 0 and streambufoffset + st + 1 or 1
|
|
return table.concat(str):sub(st, en)
|
|
else
|
|
str[#str + 1] = string.char(getstreamchar(i - 1))
|
|
i = i + 1
|
|
end
|
|
end
|
|
return table.concat(str)
|
|
end
|
|
|
|
function checkstreamlen(index)
|
|
local str
|
|
while true do
|
|
if index < streambufoffset then
|
|
return true
|
|
else
|
|
if last then
|
|
s = streambufoffset
|
|
return false
|
|
end
|
|
local max = captop
|
|
for i = stackptr - 1, 0, -1 do
|
|
local val = STACK[i].X == CHOICE and STACK[i].caplevel or -1
|
|
if val >= 0 then
|
|
max = math.min(val, max)
|
|
end
|
|
end
|
|
local n, out, outindex = lpcap.getcapturesruntime(CAPTURE, nil, getstreamstring, false, 0, max, captop, valuetable, unpack(arg, 1, argcount))
|
|
if n > 0 then
|
|
for i = stackptr - 1, 0, -1 do
|
|
local val = STACK[i].caplevel
|
|
if val > 0 then
|
|
STACK[i].caplevel = STACK[i].caplevel - n
|
|
end
|
|
end
|
|
captop = captop - n
|
|
end
|
|
if outindex > 0 then
|
|
nocapturereleased = false
|
|
end
|
|
str, last = coroutine.yield(1, unpack(out, 1, outindex))
|
|
addstreamdata(str)
|
|
end
|
|
end
|
|
end
|
|
|
|
local function doublecapture()
|
|
maxcapture = maxcapture * 2
|
|
local NEWCAPTURE = ffi.new("CAPTURE[?]", maxcapture)
|
|
ffi.copy(NEWCAPTURE, CAPTURE, ffi.sizeof('CAPTURE') * captop)
|
|
CAPTURE = NEWCAPTURE
|
|
CAPTURESTACK[capturestackptr].capture = CAPTURE
|
|
CAPTURESTACK[capturestackptr].maxcapture = maxcapture
|
|
end
|
|
|
|
local function pushcapture()
|
|
CAPTURE[captop].idx = op.p[p].offset
|
|
CAPTURE[captop].kind = band(op.p[p].val, 0x0f)
|
|
CAPTURE[captop].candelete = band(op.p[p].val, BCapcandelete) ~= 0 and 1 or 0
|
|
captop = captop + 1
|
|
p = p + 1
|
|
if captop >= maxcapture then
|
|
doublecapture()
|
|
end
|
|
end
|
|
|
|
local function traceenter(typ, par)
|
|
level = level + (par or 0)
|
|
io.write(('%s+%s %s\n'):format((' '):rep(level), typ, valuetable[op.p[p].aux]))
|
|
end
|
|
|
|
local function traceleave(inst)
|
|
io.write(('%s- %s\n'):format((' '):rep(level), valuetable[op.p[inst].aux]))
|
|
level = level - 1
|
|
end
|
|
|
|
local function tracematch(typ, start, par, from, to, inst, extra, ...)
|
|
local n, caps, capscount = lpcap.getcapturesruntime(CAPTURE, o, getstreamstring, true, start, captop, captop, valuetable, ...)
|
|
local capstr = {}
|
|
for i = 1, capscount do capstr[i] = tostring(caps[i]) end
|
|
extra = extra and '(' .. extra .. ')' or ''
|
|
io.write(('%s=%s %s%s %s %s \n'):format((' '):rep(level), typ, valuetable[op.p[inst].aux], extra,
|
|
o and o:sub(from, to) or getstreamstring(from, to), table.concat(capstr, " ")))
|
|
level = level - par
|
|
end
|
|
|
|
local function fail()
|
|
-- pattern failed: try to backtrack
|
|
local X
|
|
repeat -- remove pending calls
|
|
stackptr = stackptr - 1
|
|
if stackptr == -1 then
|
|
p = FAIL
|
|
return
|
|
end
|
|
s = STACK[stackptr].s
|
|
X = STACK[stackptr].X
|
|
if usememoization and X == CALL and STACK[stackptr].memos ~= VOID then
|
|
Memo1[STACK[stackptr].pA + STACK[stackptr].memos * maxpointer] = FAIL
|
|
Memo2[STACK[stackptr].pA + STACK[stackptr].memos * maxpointer] = FAIL
|
|
end
|
|
-- lvar.2 rest
|
|
if X == LRFAIL then
|
|
CAPTURESTACK[capturestackptr] = nil
|
|
capturestackptr = capturestackptr - 1
|
|
CAPTURE = CAPTURESTACK[capturestackptr].capture
|
|
maxcapture = CAPTURESTACK[capturestackptr].maxcapture
|
|
L[STACK[stackptr].pA + s * maxpointer] = nil
|
|
end
|
|
if trace and (X == CALL or X == LRFAIL) then traceleave(STACK[stackptr].p - 1) end
|
|
until X == CHOICE or X >= 0
|
|
p = STACK[stackptr].p
|
|
for i = #valuetable, STACK[stackptr].valuetabletop + 1, -1 do
|
|
table.remove(valuetable)
|
|
end
|
|
-- inc.2
|
|
if X >= 0 then
|
|
s = X
|
|
capturestackptr = capturestackptr - 1
|
|
CAPTURE = CAPTURESTACK[capturestackptr].capture
|
|
captop = CAPTURESTACK[capturestackptr].captop
|
|
maxcapture = CAPTURESTACK[capturestackptr].maxcapture
|
|
local capture = L[STACK[stackptr].pA + STACK[stackptr].s * maxpointer].capturecommit
|
|
while captop + capture.captop >= maxcapture do
|
|
doublecapture()
|
|
end
|
|
ffi.copy(CAPTURE + captop, capture.capture, capture.captop * ffi.sizeof('CAPTURE'))
|
|
captop = captop + capture.captop
|
|
if trace then tracematch('', captop - capture.captop, 1, STACK[stackptr].s + 1, s, STACK[stackptr].p - 1, L[STACK[stackptr].pA + STACK[stackptr].s * maxpointer].level, unpack(arg, 1, argcount)) end
|
|
CAPTURESTACK[capturestackptr + 1] = nil
|
|
L[STACK[stackptr].pA + STACK[stackptr].s * maxpointer] = nil
|
|
else
|
|
captop = STACK[stackptr].caplevel
|
|
end
|
|
end
|
|
|
|
local function doublestack()
|
|
if stackptr >= maxstack then
|
|
error(("backtrack stack overflow (current limit is %d)"):format(maxstack), 0)
|
|
end
|
|
stacklimit = stacklimit * 2
|
|
stacklimit = (stacklimit > maxstack) and maxstack or stacklimit
|
|
local NEWSTACK = ffi.new("STACK[?]", stacklimit)
|
|
ffi.copy(NEWSTACK, STACK, ffi.sizeof('STACK') * stackptr)
|
|
STACK = NEWSTACK
|
|
end
|
|
|
|
if stream then
|
|
addstreamdata(o)
|
|
len = nil
|
|
o = nil
|
|
ptr = nil
|
|
end
|
|
while true do
|
|
--[[ Only for debug
|
|
io.write(("s: |%s| stck:%d, caps:%d \n"):format(s + 1, stackptr, captop))
|
|
if p ~= FAIL then
|
|
lpprint.printinst(op.p, p, valuetable)
|
|
lpprint.printcaplist(CAPTURE, captop, valuetable)
|
|
end
|
|
--]]
|
|
if p == FAIL then return -1 end
|
|
local code = op.p[p].code
|
|
if code == IEnd then
|
|
CAPTURE[captop].kind = Cclose
|
|
CAPTURE[captop].s = -1
|
|
return 0, lpcap.getcaptures(CAPTURE, o, getstreamstring, nocapturereleased and s + 1, valuetable, ...)
|
|
elseif code == IRet then
|
|
if STACK[stackptr - 1].X == CALL then
|
|
stackptr = stackptr - 1
|
|
if trace then tracematch('', STACK[stackptr].caplevel, 1, STACK[stackptr].s + 1, s, STACK[stackptr].p - 1, nil, ...) end
|
|
p = STACK[stackptr].p
|
|
if usememoization and STACK[stackptr].memos ~= VOID then
|
|
local dif = captop - STACK[stackptr].caplevel
|
|
local caps
|
|
if dif > 0 then
|
|
caps = ffi.new("CAPTURE[?]", dif)
|
|
ffi.copy(caps, CAPTURE + captop - dif, dif * ffi.sizeof('CAPTURE'))
|
|
end
|
|
local val = { s, dif, caps }
|
|
Memo1[STACK[stackptr].pA + STACK[stackptr].memos * maxpointer] = val
|
|
Memo2[STACK[stackptr].pA + STACK[stackptr].memos * maxpointer] = val
|
|
end
|
|
else
|
|
local X = STACK[stackptr - 1].X
|
|
-- lvar.1 inc.1
|
|
if X == LRFAIL or s > X then
|
|
if trace then tracematch('IB', 0, 0, STACK[stackptr - 1].s + 1, s, STACK[stackptr - 1].p - 1, L[STACK[stackptr - 1].pA + STACK[stackptr - 1].s * maxpointer].level + 1, ...) end
|
|
STACK[stackptr - 1].X = s
|
|
p = STACK[stackptr - 1].pA
|
|
s = STACK[stackptr - 1].s
|
|
local lambda = L[p + s * maxpointer]
|
|
lambda.level = lambda.level + 1
|
|
lambda.X = STACK[stackptr - 1].X
|
|
STACK[stackptr - 1].caplevel = captop
|
|
STACK[stackptr - 1].valuetabletop = #valuetable
|
|
CAPTURESTACK[capturestackptr].captop = captop
|
|
lambda.capturecommit = CAPTURESTACK[capturestackptr]
|
|
captop = 0
|
|
CAPTURE = ffi.new("CAPTURE[?]", maxcapturedefault)
|
|
CAPTURESTACK[capturestackptr] = { capture = CAPTURE, captop = captop, maxcapture = maxcapturedefault }
|
|
maxcapture = maxcapturedefault
|
|
else
|
|
-- inc.3
|
|
stackptr = stackptr - 1
|
|
p = STACK[stackptr].p
|
|
s = STACK[stackptr].X
|
|
for i = #valuetable, STACK[stackptr].valuetabletop + 1, -1 do
|
|
table.remove(valuetable)
|
|
end
|
|
local lambda = L[STACK[stackptr].pA + STACK[stackptr].s * maxpointer]
|
|
capturestackptr = capturestackptr - 1
|
|
CAPTURE = CAPTURESTACK[capturestackptr].capture
|
|
captop = CAPTURESTACK[capturestackptr].captop
|
|
maxcapture = CAPTURESTACK[capturestackptr].maxcapture
|
|
local capture = lambda.capturecommit
|
|
while captop + capture.captop >= maxcapture do
|
|
doublecapture()
|
|
end
|
|
ffi.copy(CAPTURE + captop, capture.capture, capture.captop * ffi.sizeof('CAPTURE'))
|
|
captop = captop + capture.captop
|
|
if trace then tracematch('', captop - capture.captop, 1, STACK[stackptr].s + 1, s, STACK[stackptr].p - 1, L[STACK[stackptr].pA + STACK[stackptr].s * maxpointer].level, ...) end
|
|
CAPTURESTACK[capturestackptr + 1] = nil
|
|
L[STACK[stackptr].pA + STACK[stackptr].s * maxpointer] = nil
|
|
end
|
|
end
|
|
elseif code == IBehind then
|
|
local n = op.p[p].val
|
|
if n > s then
|
|
fail()
|
|
else
|
|
s = s - n
|
|
p = p + 1
|
|
end
|
|
elseif code == IJmp then
|
|
if trace and op.p[p].aux ~= 0 then traceenter('TC') end
|
|
p = p + op.p[p].offset
|
|
elseif code == IChoice then
|
|
if stackptr == stacklimit then
|
|
doublestack()
|
|
end
|
|
STACK[stackptr].X = CHOICE
|
|
STACK[stackptr].p = p + op.p[p].offset
|
|
STACK[stackptr].s = s
|
|
STACK[stackptr].caplevel = captop
|
|
STACK[stackptr].valuetabletop = #valuetable
|
|
stackptr = stackptr + 1
|
|
p = p + 1
|
|
elseif code == ICall then
|
|
if stackptr == stacklimit then
|
|
doublestack()
|
|
end
|
|
local k = bit.band(op.p[p].val, 0xffff)
|
|
if k == 0 then
|
|
local pA = p + op.p[p].offset
|
|
local memo = Memo1[pA + s * maxpointer]
|
|
if usememoization and memo then
|
|
if trace then traceenter('M', 1) end
|
|
if memo == FAIL then
|
|
if trace then traceleave(p) end
|
|
fail()
|
|
else
|
|
local dif = memo[2]
|
|
if dif > 0 then
|
|
while captop + dif >= maxcapture do
|
|
doublecapture()
|
|
end
|
|
local caps = memo[3]
|
|
ffi.copy(CAPTURE + captop, caps, dif * ffi.sizeof('CAPTURE'))
|
|
captop = captop + dif
|
|
end
|
|
if trace then tracematch('M', captop - dif, 1, s + 1, memo[1], p, nil, ...) end
|
|
s = memo[1]
|
|
p = p + 1
|
|
end
|
|
else
|
|
if trace then traceenter('', 1) end
|
|
STACK[stackptr].X = CALL
|
|
STACK[stackptr].s = s
|
|
STACK[stackptr].p = p + 1 -- save return address
|
|
STACK[stackptr].pA = pA
|
|
STACK[stackptr].memos = s
|
|
STACK[stackptr].caplevel = captop
|
|
stackptr = stackptr + 1
|
|
p = pA
|
|
if usememoization and not memo then
|
|
memoind = memoind + 1
|
|
if memoind > maxmemo then
|
|
memoind = 0
|
|
Memo1 = Memo2
|
|
Memo2 = {}
|
|
end
|
|
end
|
|
end
|
|
else
|
|
local pA = p + op.p[p].offset
|
|
local X = L[pA + s * maxpointer]
|
|
-- lvar.1 lvar.2
|
|
if not X then
|
|
if trace then traceenter('', 1) end
|
|
CAPTURESTACK[capturestackptr].captop = captop
|
|
local capture = ffi.new("CAPTURE[?]", maxcapturedefault)
|
|
capturestackptr = capturestackptr + 1
|
|
CAPTURESTACK[capturestackptr] = { capture = capture, captop = captop, maxcapture = maxcapturedefault }
|
|
CAPTURE = capture
|
|
maxcapture = maxcapturedefault
|
|
captop = 0
|
|
L[pA + s * maxpointer] = { X = LRFAIL, k = k, cs = capturestackptr, level = 0 }
|
|
STACK[stackptr].p = p + 1
|
|
STACK[stackptr].pA = pA
|
|
STACK[stackptr].s = s
|
|
STACK[stackptr].X = LRFAIL
|
|
stackptr = stackptr + 1
|
|
p = pA
|
|
elseif X.X == LRFAIL or k < X.k then
|
|
-- lvar.3 lvar.5
|
|
fail()
|
|
else
|
|
-- lvar.4
|
|
local capture = X.capturecommit
|
|
while captop + capture.captop >= maxcapture do
|
|
doublecapture()
|
|
end
|
|
ffi.copy(CAPTURE + captop, capture.capture, capture.captop * ffi.sizeof('CAPTURE'))
|
|
captop = captop + capture.captop
|
|
p = p + 1
|
|
s = X.X
|
|
end
|
|
end
|
|
elseif code == ICommit then
|
|
stackptr = stackptr - 1
|
|
p = p + op.p[p].offset
|
|
elseif code == IPartialCommit then
|
|
STACK[stackptr - 1].s = s
|
|
STACK[stackptr - 1].caplevel = captop
|
|
STACK[stackptr - 1].valuetabletop = #valuetable
|
|
p = p + op.p[p].offset
|
|
elseif code == IBackCommit then
|
|
stackptr = stackptr - 1
|
|
s = STACK[stackptr].s
|
|
captop = STACK[stackptr].caplevel
|
|
for i = #valuetable, STACK[stackptr].valuetabletop + 1, -1 do
|
|
table.remove(valuetable)
|
|
end
|
|
p = p + op.p[p].offset
|
|
elseif code == IFailTwice then
|
|
stackptr = stackptr - 1
|
|
fail()
|
|
elseif code == IFail then
|
|
fail()
|
|
elseif code == ICloseRunTime then
|
|
-- invalidate memo
|
|
for i = 0, stackptr - 1 do
|
|
STACK[i].memos = VOID
|
|
end
|
|
local cs = {}
|
|
cs.s = o
|
|
cs.stream = getstreamstring
|
|
cs.ocap = CAPTURE
|
|
cs.ptop = arg
|
|
cs.ptopcount = argcount
|
|
local out = { outindex = 0, out = {} }
|
|
local n = lpcap.runtimecap(cs, captop, s + 1, out, valuetable) -- call function
|
|
captop = captop - n
|
|
local res = resdyncaptures(out.out[1], s + 1, len and len + 1, checkstreamlen) -- get result
|
|
-- fail?
|
|
if res == FAIL then
|
|
fail()
|
|
else
|
|
s = res - 1 -- else update current position
|
|
n = out.outindex - 1 -- number of new captures
|
|
-- any new capture?
|
|
if n > 0 then
|
|
captop = captop + 1
|
|
while captop + n + 1 >= maxcapture do
|
|
doublecapture()
|
|
end
|
|
captop = captop + n + 1
|
|
-- add new captures to 'capture' list
|
|
adddyncaptures(s + 1, CAPTURE, captop - n - 2, n, out.out, valuetable)
|
|
end
|
|
p = p + 1
|
|
end
|
|
elseif code == ICloseCapture then
|
|
local s1 = s + 1
|
|
assert(captop > 0)
|
|
-- if possible, turn capture into a full capture
|
|
if CAPTURE[captop - 1].siz == 0 and
|
|
s1 - CAPTURE[captop - 1].s < 255 then
|
|
CAPTURE[captop - 1].siz = s1 - CAPTURE[captop - 1].s + 1
|
|
p = p + 1
|
|
else
|
|
CAPTURE[captop].siz = 1
|
|
CAPTURE[captop].s = s + 1
|
|
pushcapture()
|
|
end
|
|
elseif code == IOpenCapture then
|
|
CAPTURE[captop].siz = 0
|
|
CAPTURE[captop].s = s + 1
|
|
pushcapture()
|
|
elseif code == IFullCapture then
|
|
CAPTURE[captop].siz = band(rshift(op.p[p].val, 4), 0x0F) + 1 -- save capture size
|
|
CAPTURE[captop].s = s + 1 - band(rshift(op.p[p].val, 4), 0x0F)
|
|
pushcapture()
|
|
-- standard mode
|
|
elseif o then
|
|
if code == IAny then
|
|
if s < len then
|
|
p = p + 1
|
|
s = s + 1
|
|
else
|
|
fail()
|
|
end
|
|
elseif code == ITestAny then
|
|
if s < len then
|
|
p = p + 1
|
|
else
|
|
p = p + op.p[p].offset
|
|
end
|
|
elseif code == IChar then
|
|
if s < len and ptr[s] == op.p[p].val then
|
|
p = p + 1
|
|
s = s + 1
|
|
else
|
|
fail()
|
|
end
|
|
elseif code == ITestChar then
|
|
if s < len and ptr[s] == op.p[p].val then
|
|
p = p + 1
|
|
else
|
|
p = p + op.p[p].offset
|
|
end
|
|
elseif code == ISet then
|
|
local c = ptr[s]
|
|
local set = valuetable[op.p[p].val]
|
|
if s < len and band(set[rshift(c, 5)], lshift(1, band(c, 31))) ~= 0 then
|
|
p = p + 1
|
|
s = s + 1
|
|
else
|
|
fail()
|
|
end
|
|
elseif code == ITestSet then
|
|
local c = ptr[s]
|
|
local set = valuetable[op.p[p].val]
|
|
if s < len and band(set[rshift(c, 5)], lshift(1, band(c, 31))) ~= 0 then
|
|
p = p + 1
|
|
else
|
|
p = p + op.p[p].offset
|
|
end
|
|
elseif code == ISpan then
|
|
while s < len do
|
|
local c = ptr[s]
|
|
local set = valuetable[op.p[p].val]
|
|
if band(set[rshift(c, 5)], lshift(1, band(c, 31))) == 0 then
|
|
break
|
|
end
|
|
s = s + 1
|
|
end
|
|
p = p + 1
|
|
end
|
|
else
|
|
-- stream mode
|
|
if code == IAny then
|
|
if checkstreamlen(s) then
|
|
p = p + 1
|
|
s = s + 1
|
|
else
|
|
fail()
|
|
end
|
|
elseif code == ITestAny then
|
|
if checkstreamlen(s) then
|
|
p = p + 1
|
|
else
|
|
p = p + op.p[p].offset
|
|
end
|
|
elseif code == IChar then
|
|
if checkstreamlen(s) and getstreamchar(s) == op.p[p].val then
|
|
p = p + 1
|
|
s = s + 1
|
|
else
|
|
fail()
|
|
end
|
|
elseif code == ITestChar then
|
|
if checkstreamlen(s) and getstreamchar(s) == op.p[p].val then
|
|
p = p + 1
|
|
else
|
|
p = p + op.p[p].offset
|
|
end
|
|
elseif code == ISet then
|
|
local c = checkstreamlen(s) and getstreamchar(s)
|
|
local set = valuetable[op.p[p].val]
|
|
if c and band(set[rshift(c, 5)], lshift(1, band(c, 31))) ~= 0 then
|
|
p = p + 1
|
|
s = s + 1
|
|
else
|
|
fail()
|
|
end
|
|
elseif code == ITestSet then
|
|
local c = checkstreamlen(s) and getstreamchar(s)
|
|
local set = valuetable[op.p[p].val]
|
|
if c and band(set[rshift(c, 5)], lshift(1, band(c, 31))) ~= 0 then
|
|
p = p + 1
|
|
else
|
|
p = p + op.p[p].offset
|
|
end
|
|
elseif code == ISpan then
|
|
while checkstreamlen(s) do
|
|
local c = getstreamchar(s)
|
|
local set = valuetable[op.p[p].val]
|
|
if band(set[rshift(c, 5)], lshift(1, band(c, 31))) == 0 then
|
|
break
|
|
end
|
|
s = s + 1
|
|
end
|
|
p = p + 1
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
local function setmax(val)
|
|
maxstack = val
|
|
if maxstack < INITBACK then
|
|
maxstack = INITBACK
|
|
end
|
|
end
|
|
|
|
local function setmaxbehind(val)
|
|
MAXBEHIND = math.max(MAXBEHINDPREDICATE, MAXOFF, val or 0)
|
|
end
|
|
|
|
local function enablememoization(val)
|
|
usememoization = val
|
|
end
|
|
|
|
local function enabletracing(val)
|
|
trace = val
|
|
end
|
|
|
|
-- Get the initial position for the match, interpreting negative
|
|
-- values from the end of the subject
|
|
|
|
local function initposition(len, pos)
|
|
local ii = pos or 1
|
|
-- positive index?
|
|
if (ii > 0) then
|
|
-- inside the string?
|
|
if ii <= len then
|
|
return ii - 1; -- return it (corrected to 0-base)
|
|
else
|
|
return len; -- crop at the end
|
|
end
|
|
else
|
|
-- negative index
|
|
-- inside the string?
|
|
if -ii <= len then
|
|
return len + ii -- return position from the end
|
|
else
|
|
return 0; -- crop at the beginning
|
|
end
|
|
end
|
|
end
|
|
|
|
local function lp_match(pat, s, init, valuetable, ...)
|
|
local i = initposition(s:len(), init) + 1
|
|
return select(2, match(false, true, s, i, pat.code, valuetable, ...))
|
|
end
|
|
|
|
local function lp_streammatch(pat, init, valuetable, ...)
|
|
local params = { ... }
|
|
local paramslength = select('#', ...)
|
|
local fce = coroutine.wrap(function(s, last)
|
|
return match(true, last, s, init or 1, pat.code, valuetable, unpack(params, 1, paramslength))
|
|
end)
|
|
return fce
|
|
end
|
|
|
|
local function retcount(...)
|
|
return select('#', ...), { ... }
|
|
end
|
|
|
|
-- Only for testing purpose
|
|
-- stream emulation (send all chars from string one char after char)
|
|
local function lp_emulatestreammatch(pat, s, init, valuetable, ...)
|
|
local init = initposition(s:len(), init) + 1
|
|
local fce = lp_streammatch(pat, init, valuetable, ...)
|
|
local ret, count = {}, 0
|
|
for j = 1, #s do
|
|
local pcount, pret = retcount(fce(s:sub(j, j), j == #s)) -- one char
|
|
if pret[1] == -1 then
|
|
return -- fail
|
|
elseif pret[1] == 0 then
|
|
-- parsing finished
|
|
-- collect result
|
|
for i = 2, pcount do
|
|
ret[count + i - 1] = pret[i]
|
|
end
|
|
count = count + pcount - 1
|
|
return unpack(ret, 1, count)
|
|
end
|
|
for i = 2, pcount do
|
|
ret[count + i - 1] = pret[i]
|
|
end
|
|
count = count + pcount - 1
|
|
end
|
|
return select(2, fce(s, true)) -- empty string
|
|
end
|
|
|
|
local function lp_load(str, fcetab, usemeta)
|
|
local index = 0
|
|
assert(str)
|
|
local ptr = ffi.cast('const char*', str)
|
|
local patsize = ffi.cast('uint32_t*', ptr + index)[0]
|
|
index = index + 4
|
|
local len = ffi.sizeof(treepatternelement) * patsize
|
|
|
|
local pat
|
|
if usemeta then
|
|
pat = treepattern(patsize)
|
|
else
|
|
pat = ffi.gc(ffi.cast('TREEPATTERN*', ffi.C.malloc(ffi.sizeof(treepattern, patsize))),
|
|
function(ct)
|
|
if ct.code ~= nil then
|
|
ffi.C.free(ct.code.p)
|
|
ffi.C.free(ct.code)
|
|
end
|
|
ffi.C.free(ct)
|
|
end)
|
|
ffi.fill(pat, ffi.sizeof(treepattern, patsize))
|
|
pat.treesize = patsize
|
|
pat.id = 0
|
|
end
|
|
ffi.copy(pat.p, ptr + index, len)
|
|
index = index + len
|
|
if usemeta then
|
|
pat.code = pattern()
|
|
else
|
|
pat.code = ffi.cast('PATTERN*', ffi.C.malloc(ffi.sizeof(pattern)))
|
|
assert(pat.code ~= nil)
|
|
pat.code.allocsize = 10
|
|
pat.code.size = 0
|
|
pat.code.p = ffi.C.malloc(ffi.sizeof(patternelement) * pat.code.allocsize)
|
|
assert(pat.code.p ~= nil)
|
|
ffi.fill(pat.code.p, ffi.sizeof(patternelement) * pat.code.allocsize)
|
|
end
|
|
pat.code.size = ffi.cast('uint32_t*', ptr + index)[0]
|
|
index = index + 4
|
|
local len = pat.code.size * ffi.sizeof(patternelement)
|
|
local data = ffi.string(ptr + index, len)
|
|
index = index + len
|
|
local count = ffi.cast('uint32_t*', ptr + index)[0]
|
|
index = index + 4
|
|
local valuetable = {}
|
|
for i = 1, count do
|
|
local tag = ffi.string(ptr + index, 3)
|
|
index = index + 3
|
|
--string
|
|
if tag == 'str' then
|
|
local len = ffi.cast('uint32_t*', ptr + index)[0]
|
|
index = index + 4
|
|
local val = ffi.string(ptr + index, len)
|
|
index = index + len
|
|
valuetable[#valuetable + 1] = val
|
|
elseif tag == 'num' then
|
|
--number
|
|
local len = ffi.cast('uint32_t*', ptr + index)[0]
|
|
index = index + 4
|
|
local val = ffi.string(ptr + index, len)
|
|
index = index + len
|
|
valuetable[#valuetable + 1] = tonumber(val)
|
|
elseif tag == 'cdt' then
|
|
--ctype
|
|
local val = settype()
|
|
ffi.copy(val, ptr + index, ffi.sizeof(settype))
|
|
index = index + ffi.sizeof(settype)
|
|
valuetable[#valuetable + 1] = val
|
|
elseif tag == 'fnc' then
|
|
--function
|
|
local len = ffi.cast('uint32_t*', ptr + index)[0]
|
|
index = index + 4
|
|
local fname = ffi.string(ptr + index, len)
|
|
index = index + len
|
|
len = ffi.cast('uint32_t*', ptr + index)[0]
|
|
index = index + 4
|
|
local val = ffi.string(ptr + index, len)
|
|
index = index + len
|
|
if fcetab and fcetab[fname] then
|
|
assert(type(fcetab[fname]) == 'function', ('"%s" is not function'):format(fname))
|
|
valuetable[#valuetable + 1] = fcetab[fname]
|
|
else
|
|
valuetable[#valuetable + 1] = loadstring(val)
|
|
end
|
|
end
|
|
end
|
|
pat.code.allocsize = pat.code.size
|
|
pat.code.p = ffi.C.realloc(pat.code.p, ffi.sizeof(patternelement) * pat.code.allocsize)
|
|
assert(pat.code.p ~= nil)
|
|
ffi.copy(pat.code.p, data, ffi.sizeof(patternelement) * pat.code.allocsize)
|
|
return pat, valuetable
|
|
end
|
|
|
|
local function lp_loadfile(fname, fcetab, usemeta)
|
|
local file = assert(io.open(fname, 'rb'))
|
|
local pat, valuetable = lp_load(assert(file:read("*a")), fcetab, usemeta)
|
|
file:close()
|
|
return pat, valuetable
|
|
end
|
|
|
|
-- ======================================================
|
|
|
|
return {
|
|
match = lp_match,
|
|
streammatch = lp_streammatch,
|
|
emulatestreammatch = lp_emulatestreammatch,
|
|
load = lp_load,
|
|
loadfile = lp_loadfile,
|
|
setmax = setmax,
|
|
setmaxbehind = setmaxbehind,
|
|
enablememoization = enablememoization,
|
|
enabletracing = enabletracing
|
|
}
|