forked from public/swade-fr-content
356 lines
11 KiB
Lua
356 lines
11 KiB
Lua
--[[
|
|
LPEGLJ
|
|
lpprint.lua
|
|
Tree, code and debug print function (only for debuging)
|
|
Copyright (C) 2014 Rostislav Sacek.
|
|
based on LPeg v1.0 - PEG pattern matching for Lua
|
|
Lua.org & PUC-Rio written by Roberto Ierusalimschy
|
|
http://www.inf.puc-rio.br/~roberto/lpeg/
|
|
|
|
** Permission is hereby granted, free of charge, to any person obtaining
|
|
** a copy of this software and associated documentation files (the
|
|
** "Software"), to deal in the Software without restriction, including
|
|
** without limitation the rights to use, copy, modify, merge, publish,
|
|
** distribute, sublicense, and/or sell copies of the Software, and to
|
|
** permit persons to whom the Software is furnished to do so, subject to
|
|
** the following conditions:
|
|
**
|
|
** The above copyright notice and this permission notice shall be
|
|
** included in all copies or substantial portions of the Software.
|
|
**
|
|
** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
** SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
**
|
|
** [ MIT license: http://www.opensource.org/licenses/mit-license.php ]
|
|
--]]
|
|
|
|
local ffi = require"ffi"
|
|
local band, rshift, lshift = bit.band, bit.rshift, bit.lshift
|
|
|
|
ffi.cdef[[
|
|
int isprint ( int c );
|
|
]]
|
|
|
|
local RuleLR = 0x10000
|
|
local Ruleused = 0x20000
|
|
|
|
-- {======================================================
|
|
-- Printing patterns (for debugging)
|
|
-- =======================================================
|
|
|
|
local TChar = 0
|
|
local TSet = 1
|
|
local TAny = 2 -- standard PEG elements
|
|
local TTrue = 3
|
|
local TFalse = 4
|
|
local TRep = 5
|
|
local TSeq = 6
|
|
local TChoice = 7
|
|
local TNot = 8
|
|
local TAnd = 9
|
|
local TCall = 10
|
|
local TOpenCall = 11
|
|
local TRule = 12 -- sib1 is rule's pattern, sib2 is 'next' rule
|
|
local TGrammar = 13 -- sib1 is initial (and first) rule
|
|
local TBehind = 14 -- match behind
|
|
local TCapture = 15 -- regular capture
|
|
local TRunTime = 16 -- run-time capture
|
|
|
|
local IAny = 0 -- if no char, fail
|
|
local IChar = 1 -- if char != aux, fail
|
|
local ISet = 2 -- if char not in val, fail
|
|
local ITestAny = 3 -- in no char, jump to 'offset'
|
|
local ITestChar = 4 -- if char != aux, jump to 'offset'
|
|
local ITestSet = 5 -- if char not in val, jump to 'offset'
|
|
local ISpan = 6 -- read a span of chars in val
|
|
local IBehind = 7 -- walk back 'aux' characters (fail if not possible)
|
|
local IRet = 8 -- return from a rule
|
|
local IEnd = 9 -- end of pattern
|
|
local IChoice = 10 -- stack a choice; next fail will jump to 'offset'
|
|
local IJmp = 11 -- jump to 'offset'
|
|
local ICall = 12 -- call rule at 'offset'
|
|
local IOpenCall = 13 -- call rule number 'offset' (must be closed to a ICall)
|
|
local ICommit = 14 -- pop choice and jump to 'offset'
|
|
local IPartialCommit = 15 -- update top choice to current position and jump
|
|
local IBackCommit = 16 -- "fails" but jump to its own 'offset'
|
|
local IFailTwice = 17 -- pop one choice and then fail
|
|
local IFail = 18 -- go back to saved state on choice and jump to saved offset
|
|
local IGiveup = 19 -- internal use
|
|
local IFullCapture = 20 -- complete capture of last 'off' chars
|
|
local IOpenCapture = 21 -- start a capture
|
|
local ICloseCapture = 22
|
|
local ICloseRunTime = 23
|
|
|
|
local Cclose = 0
|
|
local Cposition = 1
|
|
local Cconst = 2
|
|
local Cbackref = 3
|
|
local Carg = 4
|
|
local Csimple = 5
|
|
local Ctable = 6
|
|
local Cfunction = 7
|
|
local Cquery = 8
|
|
local Cstring = 9
|
|
local Cnum = 10
|
|
local Csubst = 11
|
|
local Cfold = 12
|
|
local Cruntime = 13
|
|
local Cgroup = 14
|
|
|
|
|
|
-- number of siblings for each tree
|
|
local numsiblings = {
|
|
[TRep] = 1,
|
|
[TSeq] = 2,
|
|
[TChoice] = 2,
|
|
[TNot] = 1,
|
|
[TAnd] = 1,
|
|
[TRule] = 2,
|
|
[TGrammar] = 1,
|
|
[TBehind] = 1,
|
|
[TCapture] = 1,
|
|
[TRunTime] = 1,
|
|
}
|
|
local names = {
|
|
[IAny] = "any",
|
|
[IChar] = "char",
|
|
[ISet] = "set",
|
|
[ITestAny] = "testany",
|
|
[ITestChar] = "testchar",
|
|
[ITestSet] = "testset",
|
|
[ISpan] = "span",
|
|
[IBehind] = "behind",
|
|
[IRet] = "ret",
|
|
[IEnd] = "end",
|
|
[IChoice] = "choice",
|
|
[IJmp] = "jmp",
|
|
[ICall] = "call",
|
|
[IOpenCall] = "open_call",
|
|
[ICommit] = "commit",
|
|
[IPartialCommit] = "partial_commit",
|
|
[IBackCommit] = "back_commit",
|
|
[IFailTwice] = "failtwice",
|
|
[IFail] = "fail",
|
|
[IGiveup] = "giveup",
|
|
[IFullCapture] = "fullcapture",
|
|
[IOpenCapture] = "opencapture",
|
|
[ICloseCapture] = "closecapture",
|
|
[ICloseRunTime] = "closeruntime"
|
|
}
|
|
|
|
local function printcharset(st)
|
|
io.write("[");
|
|
local i = 0
|
|
while i <= 255 do
|
|
local first = i;
|
|
while band(st[rshift(i, 5)], lshift(1, band(i, 31))) ~= 0 and i <= 255 do
|
|
i = i + 1
|
|
end
|
|
if i - 1 == first then -- unary range?
|
|
io.write(("(%02x)"):format(first))
|
|
elseif i - 1 > first then -- non-empty range?
|
|
io.write(("(%02x-%02x)"):format(first, i - 1))
|
|
end
|
|
i = i + 1
|
|
end
|
|
io.write("]")
|
|
end
|
|
|
|
local modes = {
|
|
[Cclose] = "close",
|
|
[Cposition] = "position",
|
|
[Cconst] = "constant",
|
|
[Cbackref] = "backref",
|
|
[Carg] = "argument",
|
|
[Csimple] = "simple",
|
|
[Ctable] = "table",
|
|
[Cfunction] = "function",
|
|
[Cquery] = "query",
|
|
[Cstring] = "string",
|
|
[Cnum] = "num",
|
|
[Csubst] = "substitution",
|
|
[Cfold] = "fold",
|
|
[Cruntime] = "runtime",
|
|
[Cgroup] = "group"
|
|
}
|
|
|
|
local function printcapkind(kind)
|
|
io.write(("%s"):format(modes[kind]))
|
|
end
|
|
|
|
local function printjmp(p, index)
|
|
io.write(("-> %d"):format(index + p[index].offset))
|
|
end
|
|
|
|
local function printrulename(p, index, rulenames)
|
|
if rulenames and rulenames[index + p[index].offset] then
|
|
io.write(' ', rulenames[index + p[index].offset])
|
|
end
|
|
end
|
|
|
|
local function printinst(p, index, valuetable, rulenames)
|
|
local code = p[index].code
|
|
if rulenames and rulenames[index] then
|
|
io.write(rulenames[index], '\n')
|
|
end
|
|
io.write(("%04d: %s "):format(index, names[code]))
|
|
if code == IChar then
|
|
io.write(("'%s'"):format(string.char(p[index].val)))
|
|
elseif code == ITestChar then
|
|
io.write(("'%s'"):format(string.char(p[index].val)))
|
|
printjmp(p, index)
|
|
printrulename(p, index, rulenames)
|
|
elseif code == IFullCapture then
|
|
printcapkind(band(p[index].val, 0x0f));
|
|
io.write((" (size = %d) (idx = %s)"):format(band(rshift(p[index].val, 4), 0xF), tostring(valuetable[p[index].offset])))
|
|
elseif code == IOpenCapture then
|
|
printcapkind(band(p[index].val, 0x0f))
|
|
io.write((" (idx = %s)"):format(tostring(valuetable[p[index].offset])))
|
|
elseif code == ISet then
|
|
printcharset(valuetable[p[index].val]);
|
|
elseif code == ITestSet then
|
|
printcharset(valuetable[p[index].val])
|
|
printjmp(p, index);
|
|
printrulename(p, index, rulenames)
|
|
elseif code == ISpan then
|
|
printcharset(valuetable[p[index].val]);
|
|
elseif code == IOpenCall then
|
|
io.write(("-> %d"):format(p[index].offset))
|
|
elseif code == IBehind then
|
|
io.write(("%d"):format(p[index].val))
|
|
elseif code == IJmp or code == ICall or code == ICommit or code == IChoice or
|
|
code == IPartialCommit or code == IBackCommit or code == ITestAny then
|
|
printjmp(p, index);
|
|
if (code == ICall or code == IJmp) and p[index].aux > 0 then
|
|
io.write(' ', valuetable[p[index].aux])
|
|
else
|
|
printrulename(p, index, rulenames)
|
|
end
|
|
end
|
|
io.write("\n")
|
|
end
|
|
|
|
|
|
local function printpatt(p, valuetable)
|
|
local ruleNames = {}
|
|
for i = 0, p.size - 1 do
|
|
local code = p.p[i].code
|
|
if (code == ICall or code == IJmp) and p.p[i].aux > 0 then
|
|
local index = i + p.p[i].offset
|
|
ruleNames[index] = valuetable[p.p[i].aux]
|
|
end
|
|
end
|
|
for i = 0, p.size - 1 do
|
|
printinst(p.p, i, valuetable, ruleNames)
|
|
end
|
|
end
|
|
|
|
|
|
local function printcap(cap, index, valuetable)
|
|
printcapkind(cap[index].kind)
|
|
io.write((" (idx: %s - size: %d) -> %d\n"):format(valuetable[cap[index].idx], cap[index].siz, cap[index].s))
|
|
end
|
|
|
|
|
|
local function printcaplist(cap, limit, valuetable)
|
|
io.write(">======\n")
|
|
local index = 0
|
|
while cap[index].s and index < limit do
|
|
printcap(cap, index, valuetable)
|
|
index = index + 1
|
|
end
|
|
io.write("=======\n")
|
|
end
|
|
|
|
-- ======================================================
|
|
|
|
|
|
|
|
-- {======================================================
|
|
-- Printing trees (for debugging)
|
|
-- =======================================================
|
|
|
|
local tagnames = {
|
|
[TChar] = "char",
|
|
[TSet] = "set",
|
|
[TAny] = "any",
|
|
[TTrue] = "true",
|
|
[TFalse] = "false",
|
|
[TRep] = "rep",
|
|
[TSeq] = "seq",
|
|
[TChoice] = "choice",
|
|
[TNot] = "not",
|
|
[TAnd] = "and",
|
|
[TCall] = "call",
|
|
[TOpenCall] = "opencall",
|
|
[TRule] = "rule",
|
|
[TGrammar] = "grammar",
|
|
[TBehind] = "behind",
|
|
[TCapture] = "capture",
|
|
[TRunTime] = "run-time"
|
|
}
|
|
|
|
|
|
local function printtree(tree, ident, index, valuetable)
|
|
for i = 1, ident do
|
|
io.write(" ")
|
|
end
|
|
local tag = tree[index].tag
|
|
io.write(("%s"):format(tagnames[tag]))
|
|
if tag == TChar then
|
|
local c = tree[index].val
|
|
if ffi.C.isprint(c) then
|
|
io.write((" '%c'\n"):format(c))
|
|
else
|
|
io.write((" (%02X)\n"):format(c))
|
|
end
|
|
elseif tag == TSet then
|
|
printcharset(valuetable[tree[index].val]);
|
|
io.write("\n")
|
|
elseif tag == TOpenCall or tag == TCall then
|
|
io.write((" key: %s\n"):format(tostring(valuetable[tree[index].val])))
|
|
elseif tag == TBehind then
|
|
io.write((" %d\n"):format(tree[index].val))
|
|
printtree(tree, ident + 2, index + 1, valuetable);
|
|
elseif tag == TCapture then
|
|
io.write((" cap: %s n: %s\n"):format(modes[bit.band(tree[index].cap, 0xffff)], valuetable[tree[index].val]))
|
|
printtree(tree, ident + 2, index + 1, valuetable);
|
|
elseif tag == TRule then
|
|
local extra = bit.band(tree[index].cap, RuleLR) == RuleLR and ' left recursive' or ''
|
|
extra = extra .. (bit.band(tree[index].cap, Ruleused) ~= Ruleused and ' not used' or '')
|
|
io.write((" n: %d key: %s%s\n"):format(bit.band(tree[index].cap, 0xffff) - 1, valuetable[tree[index].val], extra))
|
|
printtree(tree, ident + 2, index + 1, valuetable);
|
|
-- do not print next rule as a sibling
|
|
elseif tag == TGrammar then
|
|
local ruleindex = index + 1
|
|
io.write((" %d\n"):format(tree[index].val)) -- number of rules
|
|
for i = 1, tree[index].val do
|
|
printtree(tree, ident + 2, ruleindex, valuetable);
|
|
ruleindex = ruleindex + tree[ruleindex].ps
|
|
end
|
|
assert(tree[ruleindex].tag == TTrue); -- sentinel
|
|
else
|
|
local sibs = numsiblings[tree[index].tag] or 0
|
|
io.write("\n")
|
|
if sibs >= 1 then
|
|
printtree(tree, ident + 2, index + 1, valuetable);
|
|
if sibs >= 2 then
|
|
printtree(tree, ident + 2, index + tree[index].ps, valuetable)
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
-- }====================================================== */
|
|
|
|
return {
|
|
printtree = printtree,
|
|
printpatt = printpatt,
|
|
printcaplist = printcaplist,
|
|
printinst = printinst
|
|
} |