1374 lines
41 KiB
Lua
1374 lines
41 KiB
Lua
|
--[[
|
||
|
LPEGLJ
|
||
|
lpeglj.lua
|
||
|
Main module and tree generation
|
||
|
Copyright (C) 2014 Rostislav Sacek.
|
||
|
based on LPeg v1.0 - PEG pattern matching for Lua
|
||
|
Lua.org & PUC-Rio written by Roberto Ierusalimschy
|
||
|
http://www.inf.puc-rio.br/~roberto/lpeg/
|
||
|
|
||
|
** Permission is hereby granted, free of charge, to any person obtaining
|
||
|
** a copy of this software and associated documentation files (the
|
||
|
** "Software"), to deal in the Software without restriction, including
|
||
|
** without limitation the rights to use, copy, modify, merge, publish,
|
||
|
** distribute, sublicense, and/or sell copies of the Software, and to
|
||
|
** permit persons to whom the Software is furnished to do so, subject to
|
||
|
** the following conditions:
|
||
|
**
|
||
|
** The above copyright notice and this permission notice shall be
|
||
|
** included in all copies or substantial portions of the Software.
|
||
|
**
|
||
|
** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||
|
** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||
|
** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||
|
** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||
|
** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||
|
** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||
|
** SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||
|
**
|
||
|
** [ MIT license: http://www.opensource.org/licenses/mit-license.php ]
|
||
|
--]]
|
||
|
|
||
|
assert(jit.version_num > 20000, "Use LuaJIT v2.0.1 or higher.")
|
||
|
|
||
|
local ffi = require "ffi"
|
||
|
local lpcode = require "lpcode"
|
||
|
local lpprint = require "lpprint"
|
||
|
local lpvm = require "lpvm"
|
||
|
|
||
|
local band, bor, bnot, rshift, lshift = bit.band, bit.bor, bit.bnot, bit.rshift, bit.lshift
|
||
|
|
||
|
ffi.cdef [[
|
||
|
int isalnum(int c);
|
||
|
int isalpha(int c);
|
||
|
int iscntrl(int c);
|
||
|
int isdigit(int c);
|
||
|
int isgraph(int c);
|
||
|
int islower(int c);
|
||
|
int isprint(int c);
|
||
|
int ispunct(int c);
|
||
|
int isspace(int c);
|
||
|
int isupper(int c);
|
||
|
int isxdigit(int c);
|
||
|
]]
|
||
|
|
||
|
local MAXBEHIND = 255
|
||
|
local MAXRULES = 200
|
||
|
local VERSION = "1.0.0.0LJ"
|
||
|
|
||
|
local TChar = 0
|
||
|
local TSet = 1
|
||
|
local TAny = 2 -- standard PEG elements
|
||
|
local TTrue = 3
|
||
|
local TFalse = 4
|
||
|
local TRep = 5
|
||
|
local TSeq = 6
|
||
|
local TChoice = 7
|
||
|
local TNot = 8
|
||
|
local TAnd = 9
|
||
|
local TCall = 10
|
||
|
local TOpenCall = 11
|
||
|
local TRule = 12 -- sib1 is rule's pattern, sib2 is 'next' rule
|
||
|
local TGrammar = 13 -- sib1 is initial (and first) rule
|
||
|
local TBehind = 14 -- match behind
|
||
|
local TCapture = 15 -- regular capture
|
||
|
local TRunTime = 16 -- run-time capture
|
||
|
|
||
|
local IAny = 0 -- if no char, fail
|
||
|
local IChar = 1 -- if char != val, fail
|
||
|
local ISet = 2 -- if char not in val, fail
|
||
|
local ITestAny = 3 -- in no char, jump to 'offset'
|
||
|
local ITestChar = 4 -- if char != val, jump to 'offset'
|
||
|
local ITestSet = 5 -- if char not in val, jump to 'offset'
|
||
|
local ISpan = 6 -- read a span of chars in val
|
||
|
local IBehind = 7 -- walk back 'val' characters (fail if not possible)
|
||
|
local IRet = 8 -- return from a rule
|
||
|
local IEnd = 9 -- end of pattern
|
||
|
local IChoice = 10 -- stack a choice; next fail will jump to 'offset'
|
||
|
local IJmp = 11 -- jump to 'offset'
|
||
|
local ICall = 12 -- call rule at 'offset'
|
||
|
local IOpenCall = 13 -- call rule number 'offset' (must be closed to a ICall)
|
||
|
local ICommit = 14 -- pop choice and jump to 'offset'
|
||
|
local IPartialCommit = 15 -- update top choice to current position and jump
|
||
|
local IBackCommit = 16 -- "fails" but jump to its own 'offset'
|
||
|
local IFailTwice = 17 -- pop one choice and then fail
|
||
|
local IFail = 18 -- go back to saved state on choice and jump to saved offset
|
||
|
local IGiveup = 19 -- internal use
|
||
|
local IFullCapture = 20 -- complete capture of last 'off' chars
|
||
|
local IOpenCapture = 21 -- start a capture
|
||
|
local ICloseCapture = 22
|
||
|
local ICloseRunTime = 23
|
||
|
|
||
|
local Cclose = 0
|
||
|
local Cposition = 1
|
||
|
local Cconst = 2
|
||
|
local Cbackref = 3
|
||
|
local Carg = 4
|
||
|
local Csimple = 5
|
||
|
local Ctable = 6
|
||
|
local Cfunction = 7
|
||
|
local Cquery = 8
|
||
|
local Cstring = 9
|
||
|
local Cnum = 10
|
||
|
local Csubst = 11
|
||
|
local Cfold = 12
|
||
|
local Cruntime = 13
|
||
|
local Cgroup = 14
|
||
|
|
||
|
local PEnullable = 0
|
||
|
local PEnofail = 1
|
||
|
local PEleftrecursion = 2
|
||
|
|
||
|
local newgrammar
|
||
|
|
||
|
local RuleLR = 0x10000
|
||
|
local Ruleused = 0x20000
|
||
|
local BCapcandelete = 0x30000
|
||
|
|
||
|
local LREnable = false
|
||
|
|
||
|
-- number of siblings for each tree
|
||
|
local numsiblings = {
|
||
|
0, 0, 0, -- char, set, any
|
||
|
0, 0, -- true, false
|
||
|
1, -- rep
|
||
|
2, 2, -- seq, choice
|
||
|
1, 1, -- not, and
|
||
|
0, 0, 2, 1, -- call, opencall, rule, grammar
|
||
|
1, -- behind
|
||
|
1, 1 -- capture, runtime capture
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
local patternid = 0
|
||
|
local valuetable = {}
|
||
|
|
||
|
local funcnames = setmetatable({}, { __mode = 'k' })
|
||
|
|
||
|
local treepatternelement = ffi.typeof('TREEPATTERN_ELEMENT')
|
||
|
local treepattern = ffi.typeof('TREEPATTERN')
|
||
|
local patternelement = ffi.typeof('PATTERN_ELEMENT')
|
||
|
local pattern = ffi.typeof('PATTERN')
|
||
|
local settype = ffi.typeof('int32_t[8]')
|
||
|
local uint32 = ffi.typeof('uint32_t[1]')
|
||
|
|
||
|
-- Fix a TOpenCall into a TCall node, using table 'postable' to
|
||
|
-- translate a key to its rule address in the tree. Raises an
|
||
|
-- error if key does not exist.
|
||
|
|
||
|
local function fixonecall(postable, grammar, index, valuetable)
|
||
|
local name = valuetable[grammar.p[index].val] -- get rule's name
|
||
|
local n = postable[name] -- query name in position table
|
||
|
-- no position?
|
||
|
if not n then
|
||
|
error(("rule '%s' undefined in given grammar"):format(type(name) == 'table' and '(a table)' or name), 0)
|
||
|
end
|
||
|
grammar.p[index].tag = TCall;
|
||
|
grammar.p[index].ps = n - index -- position relative to node
|
||
|
grammar.p[index + grammar.p[index].ps].cap = bit.bor(grammar.p[index + grammar.p[index].ps].cap, Ruleused)
|
||
|
end
|
||
|
|
||
|
|
||
|
-- Transform left associative constructions into right
|
||
|
-- associative ones, for sequence and choice; that is:
|
||
|
-- (t11 + t12) + t2 => t11 + (t12 + t2)
|
||
|
-- (t11 * t12) * t2 => t11 * (t12 * t2)
|
||
|
-- (that is, Op (Op t11 t12) t2 => Op t11 (Op t12 t2))
|
||
|
|
||
|
local function correctassociativity(tree, index)
|
||
|
local t1 = index + 1
|
||
|
assert(tree.p[index].tag == TChoice or tree.p[index].tag == TSeq)
|
||
|
while tree.p[t1].tag == tree.p[index].tag do
|
||
|
local n1size = tree.p[index].ps - 1; -- t1 == Op t11 t12
|
||
|
local n11size = tree.p[t1].ps - 1;
|
||
|
local n12size = n1size - n11size - 1
|
||
|
for i = 1, n11size do
|
||
|
ffi.copy(tree.p + index + i, tree.p + t1 + i, ffi.sizeof(treepatternelement))
|
||
|
end
|
||
|
tree.p[index].ps = n11size + 1
|
||
|
tree.p[index + tree.p[index].ps].tag = tree.p[index].tag
|
||
|
tree.p[index + tree.p[index].ps].ps = n12size + 1
|
||
|
end
|
||
|
end
|
||
|
|
||
|
|
||
|
-- Make final adjustments in a tree. Fix open calls in tree,
|
||
|
-- making them refer to their respective rules or raising appropriate
|
||
|
-- errors (if not inside a grammar). Correct associativity of associative
|
||
|
-- constructions (making them right associative).
|
||
|
|
||
|
local function finalfix(fix, postable, grammar, index, valuetable)
|
||
|
|
||
|
local tag = grammar.p[index].tag
|
||
|
--subgrammars were already fixed
|
||
|
if tag == TGrammar then
|
||
|
return
|
||
|
elseif tag == TOpenCall then
|
||
|
-- inside a grammar?
|
||
|
if fix then
|
||
|
fixonecall(postable, grammar, index, valuetable)
|
||
|
-- open call outside grammar
|
||
|
else
|
||
|
error(("rule '%s' used outside a grammar"):format(tostring(valuetable[grammar.p[index].val])), 0)
|
||
|
end
|
||
|
elseif tag == TSeq or tag == TChoice then
|
||
|
correctassociativity(grammar, index)
|
||
|
end
|
||
|
local ns = numsiblings[tag + 1]
|
||
|
if ns == 0 then
|
||
|
elseif ns == 1 then
|
||
|
return finalfix(fix, postable, grammar, index + 1, valuetable)
|
||
|
elseif ns == 2 then
|
||
|
finalfix(fix, postable, grammar, index + 1, valuetable)
|
||
|
return finalfix(fix, postable, grammar, index + grammar.p[index].ps, valuetable)
|
||
|
else
|
||
|
assert(false)
|
||
|
end
|
||
|
end
|
||
|
|
||
|
|
||
|
-- {======================================================
|
||
|
-- Tree generation
|
||
|
-- =======================================================
|
||
|
|
||
|
local function newcharset()
|
||
|
local tree = treepattern(1)
|
||
|
valuetable[tree.id] = { settype() }
|
||
|
tree.p[0].tag = TSet
|
||
|
tree.p[0].val = 1
|
||
|
return tree, valuetable[tree.id][1]
|
||
|
end
|
||
|
|
||
|
|
||
|
-- add to tree a sequence where first sibling is 'sib' (with size
|
||
|
-- 'sibsize')
|
||
|
|
||
|
local function seqaux(tree, sib, start, sibsize)
|
||
|
tree.p[start].tag = TSeq;
|
||
|
tree.p[start].ps = sibsize + 1
|
||
|
ffi.copy(tree.p + start + 1, sib.p, ffi.sizeof(treepatternelement) * sibsize)
|
||
|
end
|
||
|
|
||
|
|
||
|
-- Build a sequence of 'n' nodes, each with tag 'tag' and 'val' got
|
||
|
-- from the array 's' (or 0 if array is NULL). (TSeq is binary, so it
|
||
|
-- must build a sequence of sequence of sequence...)
|
||
|
|
||
|
local function fillseq(tree, tag, start, n, s)
|
||
|
-- initial n-1 copies of Seq tag; Seq ...
|
||
|
for i = 1, n - 1 do
|
||
|
tree.p[start].tag = TSeq
|
||
|
tree.p[start].ps = 2
|
||
|
tree.p[start + 1].tag = tag
|
||
|
if s then
|
||
|
tree.p[start + 1].val = s:sub(i, i):byte()
|
||
|
end
|
||
|
start = start + tree.p[start].ps
|
||
|
end
|
||
|
tree.p[start].tag = tag -- last one does not need TSeq
|
||
|
if s then
|
||
|
tree.p[start].val = s:sub(n, n):byte()
|
||
|
end
|
||
|
end
|
||
|
|
||
|
|
||
|
-- Numbers as patterns:
|
||
|
-- 0 == true (always match); n == TAny repeated 'n' times;
|
||
|
-- -n == not (TAny repeated 'n' times)
|
||
|
|
||
|
local function numtree(n)
|
||
|
if n == 0 then
|
||
|
local tree = treepattern(1)
|
||
|
tree.p[0].tag = TTrue
|
||
|
return tree
|
||
|
else
|
||
|
local tree, start
|
||
|
if n > 0 then
|
||
|
tree = treepattern(2 * n - 1)
|
||
|
start = 0
|
||
|
-- negative: code it as !(-n)
|
||
|
else
|
||
|
n = -n;
|
||
|
tree = treepattern(2 * n)
|
||
|
tree.p[0].tag = TNot
|
||
|
start = 1
|
||
|
end
|
||
|
fillseq(tree, TAny, start, n) -- sequence of 'n' any's
|
||
|
return tree;
|
||
|
end
|
||
|
end
|
||
|
|
||
|
|
||
|
-- Convert value to a pattern
|
||
|
|
||
|
local function getpatt(val, name)
|
||
|
local typ = type(val)
|
||
|
if typ == 'string' then
|
||
|
-- empty?
|
||
|
if #val == 0 then
|
||
|
local pat = treepattern(1)
|
||
|
pat.p[0].tag = TTrue -- always match
|
||
|
return pat
|
||
|
else
|
||
|
local tree = treepattern(2 * (#val - 1) + 1)
|
||
|
fillseq(tree, TChar, 0, #val, val) -- sequence of '#val' chars
|
||
|
return tree
|
||
|
end
|
||
|
elseif typ == 'number' then
|
||
|
return numtree(val)
|
||
|
elseif typ == 'boolean' then
|
||
|
local pat = treepattern(1)
|
||
|
pat.p[0].tag = val and TTrue or TFalse
|
||
|
return pat
|
||
|
elseif typ == 'table' then
|
||
|
return newgrammar(val)
|
||
|
elseif typ == 'function' then
|
||
|
if name and type(name) == 'string' then
|
||
|
funcnames[val] = name
|
||
|
end
|
||
|
local pat = treepattern(2)
|
||
|
valuetable[pat.id] = { val }
|
||
|
pat.p[0].tag = TRunTime
|
||
|
pat.p[0].val = 1
|
||
|
pat.p[1].tag = TTrue
|
||
|
return pat
|
||
|
elseif ffi.istype(treepattern, val) then
|
||
|
assert(val.treesize > 0)
|
||
|
return val
|
||
|
end
|
||
|
assert(false)
|
||
|
end
|
||
|
|
||
|
local function copykeys(ktable1, ktable2)
|
||
|
local ktable, offset = {}, 0
|
||
|
if not ktable1 and not ktable2 then
|
||
|
return ktable, 0
|
||
|
elseif ktable1 then
|
||
|
for i = 1, #ktable1 do
|
||
|
ktable[#ktable + 1] = ktable1[i]
|
||
|
end
|
||
|
offset = #ktable1
|
||
|
if not ktable2 then
|
||
|
return ktable, 0
|
||
|
end
|
||
|
end
|
||
|
if ktable2 then
|
||
|
for i = 1, #ktable2 do
|
||
|
ktable[#ktable + 1] = ktable2[i]
|
||
|
end
|
||
|
end
|
||
|
assert(#ktable < 65536, "too many Lua values in pattern")
|
||
|
return ktable, offset
|
||
|
end
|
||
|
|
||
|
local function correctkeys(tree, index, offset)
|
||
|
local tag = tree.p[index].tag
|
||
|
if (tag == TSet or tag == TRule or tag == TCall or tag == TRunTime or tag == TOpenCall or tag == TCapture) and
|
||
|
tree.p[index].val ~= 0 then
|
||
|
tree.p[index].val = tree.p[index].val + offset
|
||
|
end
|
||
|
local ns = numsiblings[tag + 1]
|
||
|
if ns == 0 then
|
||
|
elseif ns == 1 then
|
||
|
return correctkeys(tree, index + 1, offset)
|
||
|
elseif ns == 2 then
|
||
|
correctkeys(tree, index + 1, offset)
|
||
|
return correctkeys(tree, index + tree.p[index].ps, offset)
|
||
|
else
|
||
|
assert(false)
|
||
|
end
|
||
|
end
|
||
|
|
||
|
|
||
|
|
||
|
-- create a new tree, with a new root and one sibling.
|
||
|
|
||
|
local function newroot1sib(tag, pat)
|
||
|
local tree1 = getpatt(pat)
|
||
|
local tree = treepattern(1 + tree1.treesize) -- create new tree
|
||
|
valuetable[tree.id] = copykeys(valuetable[tree1.id])
|
||
|
tree.p[0].tag = tag
|
||
|
ffi.copy(tree.p + 1, tree1.p, ffi.sizeof(treepatternelement) * tree1.treesize)
|
||
|
return tree
|
||
|
end
|
||
|
|
||
|
|
||
|
-- create a new tree, with a new root and 2 siblings.
|
||
|
|
||
|
local function newroot2sib(tag, pat1, pat2)
|
||
|
local tree1 = getpatt(pat1)
|
||
|
local tree2 = getpatt(pat2)
|
||
|
local tree = treepattern(1 + tree1.treesize + tree2.treesize) -- create new tree
|
||
|
local ktable, offset = copykeys(valuetable[tree1.id], valuetable[tree2.id])
|
||
|
valuetable[tree.id] = ktable
|
||
|
tree.p[0].tag = tag
|
||
|
tree.p[0].ps = 1 + tree1.treesize
|
||
|
ffi.copy(tree.p + 1, tree1.p, ffi.sizeof(treepatternelement) * tree1.treesize)
|
||
|
ffi.copy(tree.p + 1 + tree1.treesize, tree2.p, ffi.sizeof(treepatternelement) * tree2.treesize)
|
||
|
if offset > 0 then
|
||
|
correctkeys(tree, 1 + tree1.treesize, offset)
|
||
|
end
|
||
|
return tree;
|
||
|
end
|
||
|
|
||
|
|
||
|
local function lp_P(val, name)
|
||
|
assert(type(val) ~= 'nil')
|
||
|
return getpatt(val, name)
|
||
|
end
|
||
|
|
||
|
|
||
|
-- sequence operator; optimizations:
|
||
|
-- false x => false, x true => x, true x => x
|
||
|
-- (cannot do x . false => false because x may have runtime captures)
|
||
|
|
||
|
local function lp_seq(pat1, pat2)
|
||
|
local tree1 = getpatt(pat1)
|
||
|
local tree2 = getpatt(pat2)
|
||
|
-- false . x == false, x . true = x
|
||
|
if tree1.p[0].tag == TFalse or tree2.p[0].tag == TTrue then
|
||
|
return tree1
|
||
|
-- true . x = x
|
||
|
elseif tree1.p[0].tag == TTrue then
|
||
|
return tree2
|
||
|
else
|
||
|
return newroot2sib(TSeq, tree1, tree2)
|
||
|
end
|
||
|
end
|
||
|
|
||
|
|
||
|
-- choice operator; optimizations:
|
||
|
-- charset / charset => charset
|
||
|
-- true / x => true, x / false => x, false / x => x
|
||
|
-- (x / true is not equivalent to true)
|
||
|
|
||
|
local function lp_choice(pat1, pat2)
|
||
|
local tree1 = getpatt(pat1)
|
||
|
local tree2 = getpatt(pat2)
|
||
|
local charset1 = lpcode.tocharset(tree1, 0, valuetable[tree1.id])
|
||
|
local charset2 = lpcode.tocharset(tree2, 0, valuetable[tree2.id])
|
||
|
if charset1 and charset2 then
|
||
|
local t, set = newcharset()
|
||
|
for i = 0, 7 do
|
||
|
set[i] = bor(charset1[i], charset2[i])
|
||
|
end
|
||
|
return t
|
||
|
elseif lpcode.checkaux(tree1, PEnofail, 0) or tree2.p[0].tag == TFalse then
|
||
|
return tree1 -- true / x => true, x / false => x
|
||
|
elseif tree1.p[0].tag == TFalse then
|
||
|
return tree2 -- false / x => x
|
||
|
else
|
||
|
return newroot2sib(TChoice, tree1, tree2)
|
||
|
end
|
||
|
end
|
||
|
|
||
|
|
||
|
-- p^n
|
||
|
|
||
|
local function lp_star(tree1, n)
|
||
|
local tree
|
||
|
n = tonumber(n)
|
||
|
assert(type(n) == 'number')
|
||
|
-- seq tree1 (seq tree1 ... (seq tree1 (rep tree1)))
|
||
|
if n >= 0 then
|
||
|
tree = treepattern((n + 1) * (tree1.treesize + 1))
|
||
|
if lpcode.checkaux(tree1, PEnullable, 0) then
|
||
|
error("loop body may accept empty string", 0)
|
||
|
end
|
||
|
valuetable[tree.id] = copykeys(valuetable[tree1.id])
|
||
|
local start = 0
|
||
|
-- repeat 'n' times
|
||
|
for i = 1, n do
|
||
|
seqaux(tree, tree1, start, tree1.treesize)
|
||
|
start = start + tree.p[start].ps
|
||
|
end
|
||
|
tree.p[start].tag = TRep
|
||
|
ffi.copy(tree.p + start + 1, tree1.p, ffi.sizeof(treepatternelement) * tree1.treesize)
|
||
|
-- choice (seq tree1 ... choice tree1 true ...) true
|
||
|
else
|
||
|
n = -n;
|
||
|
-- size = (choice + seq + tree1 + true) * n, but the last has no seq
|
||
|
tree = treepattern(n * (tree1.treesize + 3) - 1)
|
||
|
valuetable[tree.id] = copykeys(valuetable[tree1.id])
|
||
|
local start = 0
|
||
|
-- repeat (n - 1) times
|
||
|
for i = n, 2, -1 do
|
||
|
tree.p[start].tag = TChoice;
|
||
|
tree.p[start].ps = i * (tree1.treesize + 3) - 2
|
||
|
tree.p[start + tree.p[start].ps].tag = TTrue;
|
||
|
start = start + 1
|
||
|
seqaux(tree, tree1, start, tree1.treesize)
|
||
|
start = start + tree.p[start].ps
|
||
|
end
|
||
|
tree.p[start].tag = TChoice;
|
||
|
tree.p[start].ps = tree1.treesize + 1
|
||
|
tree.p[start + tree.p[start].ps].tag = TTrue
|
||
|
ffi.copy(tree.p + start + 1, tree1.p, ffi.sizeof(treepatternelement) * tree1.treesize)
|
||
|
end
|
||
|
return tree
|
||
|
end
|
||
|
|
||
|
|
||
|
-- #p == &p
|
||
|
|
||
|
local function lp_and(pat)
|
||
|
return newroot1sib(TAnd, pat)
|
||
|
end
|
||
|
|
||
|
|
||
|
-- -p == !p
|
||
|
|
||
|
local function lp_not(pat)
|
||
|
return newroot1sib(TNot, pat)
|
||
|
end
|
||
|
|
||
|
|
||
|
-- [t1 - t2] == Seq (Not t2) t1
|
||
|
-- If t1 and t2 are charsets, make their difference.
|
||
|
|
||
|
local function lp_sub(pat1, pat2)
|
||
|
local tree1 = getpatt(pat1)
|
||
|
local tree2 = getpatt(pat2)
|
||
|
local charset1 = lpcode.tocharset(tree1, 0, valuetable[tree1.id])
|
||
|
local charset2 = lpcode.tocharset(tree2, 0, valuetable[tree2.id])
|
||
|
if charset1 and charset2 then
|
||
|
local tree, set = newcharset()
|
||
|
for i = 0, 7 do
|
||
|
set[i] = band(charset1[i], bnot(charset2[i]))
|
||
|
end
|
||
|
return tree
|
||
|
else
|
||
|
local tree = treepattern(2 + tree1.treesize + tree2.treesize)
|
||
|
local ktable, offset = copykeys(valuetable[tree2.id], valuetable[tree1.id])
|
||
|
valuetable[tree.id] = ktable
|
||
|
tree.p[0].tag = TSeq; -- sequence of...
|
||
|
tree.p[0].ps = 2 + tree2.treesize
|
||
|
tree.p[1].tag = TNot; -- ...not...
|
||
|
ffi.copy(tree.p + 2, tree2.p, ffi.sizeof(treepatternelement) * tree2.treesize)
|
||
|
ffi.copy(tree.p + tree2.treesize + 2, tree1.p, ffi.sizeof(treepatternelement) * tree1.treesize)
|
||
|
if offset > 0 then
|
||
|
correctkeys(tree, 2 + tree2.treesize, offset)
|
||
|
end
|
||
|
return tree
|
||
|
end
|
||
|
end
|
||
|
|
||
|
|
||
|
local function lp_set(val)
|
||
|
assert(type(val) == 'string')
|
||
|
local tree, set = newcharset()
|
||
|
for i = 1, #val do
|
||
|
local b = val:sub(i, i):byte()
|
||
|
set[rshift(b, 5)] = bor(set[rshift(b, 5)], lshift(1, band(b, 31)))
|
||
|
end
|
||
|
return tree
|
||
|
end
|
||
|
|
||
|
|
||
|
local function lp_range(...)
|
||
|
local args = { ... }
|
||
|
local top = #args
|
||
|
local tree, set = newcharset()
|
||
|
for i = 1, top do
|
||
|
assert(#args[i] == 2, args[i] .. " range must have two characters")
|
||
|
for b = args[i]:sub(1, 1):byte(), args[i]:sub(2, 2):byte() do
|
||
|
set[rshift(b, 5)] = bor(set[rshift(b, 5)], lshift(1, band(b, 31)))
|
||
|
end
|
||
|
end
|
||
|
return tree
|
||
|
end
|
||
|
|
||
|
|
||
|
-- Look-behind predicate
|
||
|
|
||
|
local function lp_behind(pat)
|
||
|
local tree1 = getpatt(pat)
|
||
|
local n = lpcode.fixedlenx(tree1, 0, 0, 0)
|
||
|
assert(not lpcode.hascaptures(tree1, 0), "pattern have captures")
|
||
|
assert(n >= 0, "pattern may not have fixed length")
|
||
|
assert(n <= MAXBEHIND, "pattern too long to look behind")
|
||
|
local tree = newroot1sib(TBehind, pat)
|
||
|
tree.p[0].val = n;
|
||
|
return tree
|
||
|
end
|
||
|
|
||
|
|
||
|
-- Create a non-terminal
|
||
|
|
||
|
local function lp_V(val, p)
|
||
|
assert(val, "non-nil value expected")
|
||
|
local tree = treepattern(1)
|
||
|
valuetable[tree.id] = { val }
|
||
|
tree.p[0].tag = TOpenCall
|
||
|
tree.p[0].val = 1
|
||
|
tree.p[0].cap = p or 0
|
||
|
return tree
|
||
|
end
|
||
|
|
||
|
|
||
|
-- Create a tree for a non-empty capture, with a body and
|
||
|
-- optionally with an associated value
|
||
|
|
||
|
local function capture_aux(cap, pat, val)
|
||
|
local tree = newroot1sib(TCapture, pat)
|
||
|
tree.p[0].cap = cap
|
||
|
if val then
|
||
|
local ind = #valuetable[tree.id] + 1
|
||
|
assert(ind <= 65536, "too many Lua values in pattern" .. ind)
|
||
|
valuetable[tree.id][ind] = val
|
||
|
tree.p[0].val = ind
|
||
|
end
|
||
|
return tree
|
||
|
end
|
||
|
|
||
|
|
||
|
-- Fill a tree with an empty capture, using an empty (TTrue) sibling.
|
||
|
|
||
|
local function auxemptycap(tree, cap, par, start)
|
||
|
tree.p[start].tag = TCapture;
|
||
|
tree.p[start].cap = cap
|
||
|
if type(par) ~= 'nil' then
|
||
|
local ind = #valuetable[tree.id] + 1
|
||
|
assert(ind <= 65536, "too many Lua values in pattern")
|
||
|
valuetable[tree.id][ind] = par
|
||
|
tree.p[start].val = ind
|
||
|
end
|
||
|
tree.p[start + 1].tag = TTrue;
|
||
|
end
|
||
|
|
||
|
|
||
|
-- Create a tree for an empty capture
|
||
|
|
||
|
local function newemptycap(cap, par)
|
||
|
local tree = treepattern(2)
|
||
|
if type(par) ~= 'nil' then valuetable[tree.id] = {} end
|
||
|
auxemptycap(tree, cap, par, 0)
|
||
|
return tree
|
||
|
end
|
||
|
|
||
|
|
||
|
-- Captures with syntax p / v
|
||
|
-- (function capture, query capture, string capture, or number capture)
|
||
|
|
||
|
local function lp_divcapture(pat, par, xxx)
|
||
|
local typ = type(par)
|
||
|
if typ == "function" then
|
||
|
return capture_aux(Cfunction, pat, par)
|
||
|
elseif typ == "table" then
|
||
|
return capture_aux(Cquery, pat, par)
|
||
|
elseif typ == "string" then
|
||
|
return capture_aux(Cstring, pat, par)
|
||
|
elseif typ == "number" then
|
||
|
local tree = newroot1sib(TCapture, pat)
|
||
|
assert(0 <= par and par <= 0xffff, "invalid number")
|
||
|
tree.p[0].cap = Cnum;
|
||
|
local ind = #valuetable[tree.id] + 1
|
||
|
assert(ind <= 65536, "too many Lua values in pattern")
|
||
|
valuetable[tree.id][ind] = par
|
||
|
tree.p[0].val = ind
|
||
|
return tree
|
||
|
else
|
||
|
error("invalid replacement value", 0)
|
||
|
end
|
||
|
end
|
||
|
|
||
|
|
||
|
local function lp_substcapture(pat)
|
||
|
return capture_aux(Csubst, pat)
|
||
|
end
|
||
|
|
||
|
|
||
|
local function lp_tablecapture(pat)
|
||
|
return capture_aux(Ctable, pat, 0)
|
||
|
end
|
||
|
|
||
|
|
||
|
local function lp_groupcapture(pat, val)
|
||
|
if not val then
|
||
|
return capture_aux(Cgroup, pat)
|
||
|
else
|
||
|
return capture_aux(Cgroup, pat, val)
|
||
|
end
|
||
|
end
|
||
|
|
||
|
|
||
|
local function lp_foldcapture(pat, fce)
|
||
|
assert(type(fce) == 'function')
|
||
|
return capture_aux(Cfold, pat, fce)
|
||
|
end
|
||
|
|
||
|
|
||
|
local function lp_simplecapture(pat)
|
||
|
return capture_aux(Csimple, pat)
|
||
|
end
|
||
|
|
||
|
|
||
|
local function lp_poscapture()
|
||
|
return newemptycap(Cposition)
|
||
|
end
|
||
|
|
||
|
|
||
|
local function lp_argcapture(val)
|
||
|
assert(type(val) == 'number')
|
||
|
local tree = newemptycap(Carg, 0)
|
||
|
local ind = #valuetable[tree.id] + 1
|
||
|
assert(ind <= 65536, "too many Lua values in pattern")
|
||
|
valuetable[tree.id][ind] = val
|
||
|
tree.p[0].val = ind
|
||
|
assert(0 < val and val <= 0xffff, "invalid argument index")
|
||
|
return tree
|
||
|
end
|
||
|
|
||
|
|
||
|
local function lp_backref(val)
|
||
|
return newemptycap(Cbackref, val)
|
||
|
end
|
||
|
|
||
|
|
||
|
-- Constant capture
|
||
|
|
||
|
local function lp_constcapture(...)
|
||
|
local tree
|
||
|
local args = { ... }
|
||
|
local n = select('#', ...) -- number of values
|
||
|
-- no values?
|
||
|
if n == 0 then
|
||
|
tree = treepattern(1) -- no capture
|
||
|
tree.p[0].tag = TTrue
|
||
|
elseif n == 1 then
|
||
|
tree = newemptycap(Cconst, args[1]) -- single constant capture
|
||
|
-- create a group capture with all values
|
||
|
else
|
||
|
tree = treepattern(3 + 3 * (n - 1))
|
||
|
valuetable[tree.id] = {}
|
||
|
tree.p[0].tag = TCapture
|
||
|
tree.p[0].cap = Cgroup
|
||
|
local start = 1
|
||
|
for i = 1, n - 1 do
|
||
|
tree.p[start].tag = TSeq
|
||
|
tree.p[start].ps = 3
|
||
|
auxemptycap(tree, Cconst, args[i], start + 1)
|
||
|
start = start + tree.p[start].ps
|
||
|
end
|
||
|
auxemptycap(tree, Cconst, args[n], start)
|
||
|
end
|
||
|
return tree
|
||
|
end
|
||
|
|
||
|
|
||
|
local function lp_matchtime(pat, fce, name)
|
||
|
assert(type(fce) == 'function')
|
||
|
if name and type(name) == 'string' then
|
||
|
funcnames[fce] = name
|
||
|
end
|
||
|
local tree = newroot1sib(TRunTime, pat)
|
||
|
local ind = #valuetable[tree.id] + 1
|
||
|
assert(ind <= 65536, "too many Lua values in pattern")
|
||
|
valuetable[tree.id][ind] = fce
|
||
|
tree.p[0].val = ind
|
||
|
return tree
|
||
|
end
|
||
|
|
||
|
-- ======================================================
|
||
|
|
||
|
|
||
|
|
||
|
-- ======================================================
|
||
|
-- Grammar - Tree generation
|
||
|
-- =======================================================
|
||
|
|
||
|
|
||
|
-- return index and the pattern for the
|
||
|
-- initial rule of grammar;
|
||
|
-- also add that index into position table.
|
||
|
|
||
|
local function getfirstrule(pat, postab)
|
||
|
local key
|
||
|
-- access first element
|
||
|
if type(pat[1]) == 'string' then
|
||
|
key = pat[1]
|
||
|
else
|
||
|
key = 1
|
||
|
end
|
||
|
local rule = pat[key]
|
||
|
if not rule then
|
||
|
error("grammar has no initial rule", 0)
|
||
|
end
|
||
|
-- initial rule not a pattern?
|
||
|
if not ffi.istype(treepattern, rule) then
|
||
|
error(("initial rule '%s' is not a pattern"):format(tostring(key)), 0)
|
||
|
end
|
||
|
postab[key] = 1
|
||
|
return key, rule
|
||
|
end
|
||
|
|
||
|
|
||
|
-- traverse grammar, collect all its keys and patterns
|
||
|
-- into rule table. Create a new table (before all pairs key-pattern) to
|
||
|
-- collect all keys and their associated positions in the final tree
|
||
|
-- (the "position table").
|
||
|
-- Return the number of rules and the total size
|
||
|
-- for the new tree.
|
||
|
|
||
|
local function collectrules(pat)
|
||
|
local n = 1; -- to count number of rules
|
||
|
local postab = {}
|
||
|
local firstkeyrule, firstrule = getfirstrule(pat, postab)
|
||
|
local rules = { firstkeyrule, firstrule }
|
||
|
local size = 2 + firstrule.treesize -- TGrammar + TRule + rule
|
||
|
for key, val in pairs(pat) do
|
||
|
-- initial rule?
|
||
|
if key ~= 1 and tostring(val) ~= tostring(firstrule) then
|
||
|
-- value is not a pattern?
|
||
|
if not ffi.istype(treepattern, val) then
|
||
|
error(("rule '%s' is not a pattern"):format(tostring(key)), 0)
|
||
|
end
|
||
|
rules[#rules + 1] = key
|
||
|
rules[#rules + 1] = val
|
||
|
postab[key] = size
|
||
|
size = 1 + size + val.treesize
|
||
|
n = n + 1
|
||
|
end
|
||
|
end
|
||
|
size = size + 1; -- TTrue to finish list of rules
|
||
|
return n, size, rules, postab
|
||
|
end
|
||
|
|
||
|
|
||
|
local function buildgrammar(grammar, rules, n, index, valuetable)
|
||
|
local ktable, offset = {}, 0
|
||
|
-- add each rule into new tree
|
||
|
for i = 1, n do
|
||
|
local size = rules[i * 2].treesize
|
||
|
grammar.p[index].tag = TRule;
|
||
|
grammar.p[index].cap = i; -- rule number
|
||
|
grammar.p[index].ps = size + 1; -- point to next rule
|
||
|
local ind = #ktable + 1
|
||
|
ktable[ind] = rules[i * 2 - 1]
|
||
|
grammar.p[index].val = ind
|
||
|
ffi.copy(grammar.p + index + 1, rules[i * 2].p, ffi.sizeof(treepatternelement) * size) -- copy rule
|
||
|
ktable, offset = copykeys(ktable, valuetable[rules[i * 2].id])
|
||
|
if offset > 0 then
|
||
|
correctkeys(grammar, index + 1, offset)
|
||
|
end
|
||
|
index = index + grammar.p[index].ps; -- move to next rule
|
||
|
end
|
||
|
grammar.p[index].tag = TTrue; -- finish list of rules
|
||
|
return ktable
|
||
|
end
|
||
|
|
||
|
|
||
|
-- Check whether a tree has potential infinite loops
|
||
|
|
||
|
local function checkloops(tree, index)
|
||
|
local tag = tree.p[index].tag
|
||
|
if tag == TRep and lpcode.checkaux(tree, PEnullable, index + 1) then
|
||
|
return true
|
||
|
elseif tag == TGrammar then
|
||
|
return -- sub-grammars already checked
|
||
|
else
|
||
|
local tag = numsiblings[tree.p[index].tag + 1]
|
||
|
if tag == 0 then
|
||
|
return
|
||
|
elseif tag == 1 then
|
||
|
return checkloops(tree, index + 1)
|
||
|
elseif tag == 2 then
|
||
|
if checkloops(tree, index + 1) then
|
||
|
return true
|
||
|
else
|
||
|
return checkloops(tree, index + tree.p[index].ps)
|
||
|
end
|
||
|
else
|
||
|
assert(false)
|
||
|
end
|
||
|
end
|
||
|
end
|
||
|
|
||
|
-- Check whether a rule can be left recursive; returns PEleftrecursion in that
|
||
|
-- case; otherwise return 1 iff pattern is nullable.
|
||
|
|
||
|
local function verifyrule(rulename, tree, passed, nullable, index, valuetable)
|
||
|
local tag = tree.p[index].tag
|
||
|
if tag == TChar or tag == TSet or tag == TAny or tag == TFalse then
|
||
|
return nullable; -- cannot pass from here
|
||
|
elseif tag == TTrue or tag == TBehind then
|
||
|
return true;
|
||
|
elseif tag == TNot or tag == TAnd or tag == TRep then
|
||
|
return verifyrule(rulename, tree, passed, true, index + 1, valuetable)
|
||
|
elseif tag == TCapture or tag == TRunTime then
|
||
|
return verifyrule(rulename, tree, passed, nullable, index + 1, valuetable)
|
||
|
elseif tag == TCall then
|
||
|
local rule = valuetable[tree.p[index].val]
|
||
|
if rule == rulename then return PEleftrecursion end
|
||
|
if passed[rule] and passed[rule] > MAXRULES then
|
||
|
return nullable
|
||
|
end
|
||
|
return verifyrule(rulename, tree, passed, nullable, index + tree.p[index].ps, valuetable)
|
||
|
-- only check 2nd child if first is nullable
|
||
|
elseif tag == TSeq then
|
||
|
local res = verifyrule(rulename, tree, passed, false, index + 1, valuetable)
|
||
|
if res == PEleftrecursion then
|
||
|
return res
|
||
|
elseif not res then
|
||
|
return nullable
|
||
|
else
|
||
|
return verifyrule(rulename, tree, passed, nullable, index + tree.p[index].ps, valuetable)
|
||
|
end
|
||
|
-- must check both children
|
||
|
elseif tag == TChoice then
|
||
|
nullable = verifyrule(rulename, tree, passed, nullable, index + 1, valuetable)
|
||
|
if nullable == PEleftrecursion then return nullable end
|
||
|
return verifyrule(rulename, tree, passed, nullable, index + tree.p[index].ps, valuetable)
|
||
|
elseif tag == TRule then
|
||
|
local rule = valuetable[tree.p[index].val]
|
||
|
passed[rule] = (passed[rule] or 0) + 1
|
||
|
return verifyrule(rulename, tree, passed, nullable, index + 1, valuetable)
|
||
|
elseif tag == TGrammar then
|
||
|
return lpcode.checkaux(tree, PEnullable, index) -- sub-grammar cannot be left recursive
|
||
|
else
|
||
|
assert(false)
|
||
|
end
|
||
|
end
|
||
|
|
||
|
|
||
|
local function verifygrammar(rule, index, valuetable)
|
||
|
-- check left-recursive rules
|
||
|
local LR = {}
|
||
|
local ind = index + 1
|
||
|
while rule.p[ind].tag == TRule do
|
||
|
local rulename = valuetable[rule.p[ind].val]
|
||
|
-- used rule
|
||
|
if rulename then
|
||
|
if verifyrule(rulename, rule, {}, false, ind + 1, valuetable) == PEleftrecursion then
|
||
|
if not LREnable then
|
||
|
error(("rule '%s' may be left recursive"):format(rulename), 0)
|
||
|
end
|
||
|
LR[rulename] = true
|
||
|
end
|
||
|
end
|
||
|
ind = ind + rule.p[ind].ps
|
||
|
end
|
||
|
assert(rule.p[ind].tag == TTrue)
|
||
|
|
||
|
for i = 0, rule.treesize - 1 do
|
||
|
if rule.p[i].tag == TRule and LR[valuetable[rule.p[i].val]] then
|
||
|
rule.p[i].cap = bor(rule.p[i].cap, RuleLR) --TRule can be left recursive
|
||
|
end
|
||
|
if rule.p[i].tag == TCall and LR[valuetable[rule.p[i].val]] then
|
||
|
if rule.p[i].cap == 0 then
|
||
|
rule.p[i].cap = 1 --TCall can be left recursive
|
||
|
end
|
||
|
end
|
||
|
end
|
||
|
|
||
|
-- check infinite loops inside rules
|
||
|
ind = index + 1
|
||
|
while rule.p[ind].tag == TRule do
|
||
|
-- used rule
|
||
|
if rule.p[ind].val then
|
||
|
if checkloops(rule, ind + 1) then
|
||
|
error(("empty loop in rule '%s'"):format(tostring(valuetable[rule.p[ind].val])), 0)
|
||
|
end
|
||
|
end
|
||
|
ind = ind + rule.p[ind].ps
|
||
|
end
|
||
|
assert(rule.p[ind].tag == TTrue)
|
||
|
end
|
||
|
|
||
|
|
||
|
-- Give a name for the initial rule if it is not referenced
|
||
|
|
||
|
local function initialrulename(grammar, val, valuetable)
|
||
|
grammar.p[1].cap = bit.bor(grammar.p[1].cap, Ruleused)
|
||
|
-- initial rule is not referenced?
|
||
|
if grammar.p[1].val == 0 then
|
||
|
local ind = #valuetable + 1
|
||
|
assert(ind <= 65536, "too many Lua values in pattern")
|
||
|
valuetable[ind] = val
|
||
|
grammar.p[1].val = ind
|
||
|
end
|
||
|
end
|
||
|
|
||
|
|
||
|
function newgrammar(pat)
|
||
|
-- traverse grammar. Create a new table (before all pairs key-pattern) to
|
||
|
-- collect all keys and their associated positions in the final tree
|
||
|
-- (the "position table").
|
||
|
-- Return new tree.
|
||
|
|
||
|
local n, size, rules, postab = collectrules(pat)
|
||
|
local grammar = treepattern(size)
|
||
|
local start = 0
|
||
|
grammar.p[start].tag = TGrammar
|
||
|
grammar.p[start].val = n
|
||
|
valuetable[grammar.id] = buildgrammar(grammar, rules, n, start + 1, valuetable)
|
||
|
finalfix(true, postab, grammar, start + 1, valuetable[grammar.id])
|
||
|
initialrulename(grammar, rules[1], valuetable[grammar.id])
|
||
|
verifygrammar(grammar, 0, valuetable[grammar.id])
|
||
|
return grammar
|
||
|
end
|
||
|
|
||
|
-- ======================================================
|
||
|
|
||
|
-- remove duplicity from value table
|
||
|
|
||
|
local function reducevaluetable(p)
|
||
|
local vtable = valuetable[p.id]
|
||
|
local value = {}
|
||
|
local newvaluetable = {}
|
||
|
|
||
|
local function check(v)
|
||
|
if v > 0 then
|
||
|
local ord = value[vtable[v]]
|
||
|
if not ord then
|
||
|
newvaluetable[#newvaluetable + 1] = vtable[v]
|
||
|
ord = #newvaluetable
|
||
|
value[vtable[v]] = ord
|
||
|
end
|
||
|
return ord
|
||
|
end
|
||
|
return 0
|
||
|
end
|
||
|
|
||
|
local function itertree(p, index)
|
||
|
local tag = p.p[index].tag
|
||
|
if tag == TSet or tag == TCall or tag == TOpenCall or
|
||
|
tag == TRule or tag == TCapture or tag == TRunTime then
|
||
|
p.p[index].val = check(p.p[index].val)
|
||
|
end
|
||
|
local ns = numsiblings[tag + 1]
|
||
|
if ns == 0 then
|
||
|
elseif ns == 1 then
|
||
|
return itertree(p, index + 1)
|
||
|
elseif ns == 2 then
|
||
|
itertree(p, index + 1)
|
||
|
return itertree(p, index + p.p[index].ps)
|
||
|
else
|
||
|
assert(false)
|
||
|
end
|
||
|
end
|
||
|
|
||
|
if p.treesize > 0 then
|
||
|
itertree(p, 0)
|
||
|
end
|
||
|
if p.code ~= nil then
|
||
|
for i = 0, p.code.size - 1 do
|
||
|
local code = p.code.p[i].code
|
||
|
if code == ICall or code == IJmp then
|
||
|
p.code.p[i].aux = check(p.code.p[i].aux)
|
||
|
elseif code == ISet or code == ITestSet or code == ISpan then
|
||
|
p.code.p[i].val = check(p.code.p[i].val)
|
||
|
elseif code == IOpenCapture or code == IFullCapture then
|
||
|
p.code.p[i].offset = check(p.code.p[i].offset)
|
||
|
end
|
||
|
end
|
||
|
end
|
||
|
valuetable[p.id] = newvaluetable
|
||
|
end
|
||
|
|
||
|
|
||
|
local function checkalt(tree)
|
||
|
local notchecked = {}
|
||
|
local notinalternativerules = {}
|
||
|
|
||
|
local function iter(tree, index, choice, rule)
|
||
|
local tag = tree[index].tag
|
||
|
if tag == TCapture and bit.band(tree[index].cap, 0xffff) == Cgroup then
|
||
|
if not choice then
|
||
|
if rule then
|
||
|
notchecked[rule] = index
|
||
|
end
|
||
|
else
|
||
|
tree[index].cap = bit.bor(tree[index].cap, BCapcandelete)
|
||
|
end
|
||
|
elseif tag == TChoice then
|
||
|
choice = true
|
||
|
elseif tag == TRule then
|
||
|
rule = tree[index].val
|
||
|
if bit.band(tree[index].cap, 0xffff) - 1 == 0 then
|
||
|
notinalternativerules[rule] = notinalternativerules[rule] or true
|
||
|
end
|
||
|
elseif tag == TCall then
|
||
|
local r = tree[index].val
|
||
|
if not choice then
|
||
|
notinalternativerules[r] = notinalternativerules[r] or true
|
||
|
end
|
||
|
end
|
||
|
local sibs = numsiblings[tree[index].tag + 1] or 0
|
||
|
if sibs >= 1 then
|
||
|
iter(tree, index + 1, choice, rule)
|
||
|
if sibs >= 2 then
|
||
|
return iter(tree, index + tree[index].ps, choice, rule)
|
||
|
end
|
||
|
end
|
||
|
end
|
||
|
|
||
|
iter(tree, 0)
|
||
|
for k, v in pairs(notchecked) do
|
||
|
if not notinalternativerules[k] then
|
||
|
tree[v].cap = bit.bor(tree[v].cap, BCapcandelete)
|
||
|
end
|
||
|
end
|
||
|
end
|
||
|
|
||
|
|
||
|
local function prepcompile(p, index)
|
||
|
finalfix(false, nil, p, index, valuetable[p.id])
|
||
|
checkalt(p.p)
|
||
|
lpcode.compile(p, index, valuetable[p.id])
|
||
|
reducevaluetable(p)
|
||
|
return p.code
|
||
|
end
|
||
|
|
||
|
|
||
|
local function lp_printtree(pat, c)
|
||
|
assert(pat.treesize > 0)
|
||
|
if c then
|
||
|
finalfix(false, nil, pat, 0, valuetable[pat.id])
|
||
|
end
|
||
|
lpprint.printtree(pat.p, 0, 0, valuetable[pat.id])
|
||
|
end
|
||
|
|
||
|
|
||
|
local function lp_printcode(pat)
|
||
|
-- not compiled yet?
|
||
|
if pat.code == nil then
|
||
|
prepcompile(pat, 0)
|
||
|
end
|
||
|
lpprint.printpatt(pat.code, valuetable[pat.id])
|
||
|
end
|
||
|
|
||
|
|
||
|
-- Main match function
|
||
|
|
||
|
local function lp_match(pat, s, init, ...)
|
||
|
local p = ffi.istype(treepattern, pat) and pat or getpatt(pat)
|
||
|
p.code = p.code ~= nil and p.code or prepcompile(p, 0)
|
||
|
return lpvm.match(p, s, init, valuetable[p.id], ...)
|
||
|
end
|
||
|
|
||
|
local function lp_streammatch(pat, init, ...)
|
||
|
local p = ffi.istype(treepattern, pat) and pat or getpatt(pat)
|
||
|
p.code = p.code ~= nil and p.code or prepcompile(p, 0)
|
||
|
return lpvm.streammatch(p, init, valuetable[p.id], ...)
|
||
|
end
|
||
|
|
||
|
-- Only for testing purpose
|
||
|
-- stream emulation (send all chars from string one char after char)
|
||
|
local function lp_emulatestreammatch(pat, s, init, ...)
|
||
|
local p = ffi.istype(treepattern, pat) and pat or getpatt(pat)
|
||
|
p.code = p.code ~= nil and p.code or prepcompile(p, 0)
|
||
|
return lpvm.emulatestreammatch(p, s, init, valuetable[p.id], ...)
|
||
|
end
|
||
|
|
||
|
-- {======================================================
|
||
|
-- Library creation and functions not related to matching
|
||
|
-- =======================================================
|
||
|
|
||
|
local function lp_setmax(val)
|
||
|
lpvm.setmax(val)
|
||
|
end
|
||
|
|
||
|
local function lp_setmaxbehind(val)
|
||
|
lpvm.setmaxbehind(val)
|
||
|
end
|
||
|
|
||
|
local function lp_enableleftrecursion(val)
|
||
|
LREnable = val
|
||
|
end
|
||
|
|
||
|
local function lp_version()
|
||
|
return VERSION
|
||
|
end
|
||
|
|
||
|
|
||
|
local function lp_type(pat)
|
||
|
if ffi.istype(treepattern, pat) then
|
||
|
return "pattern"
|
||
|
end
|
||
|
end
|
||
|
|
||
|
|
||
|
local function createcat(tab, catname, catfce)
|
||
|
local t, set = newcharset()
|
||
|
for i = 0, 255 do
|
||
|
if catfce(i) ~= 0 then
|
||
|
set[rshift(i, 5)] = bor(set[rshift(i, 5)], lshift(1, band(i, 31)))
|
||
|
end
|
||
|
end
|
||
|
tab[catname] = t
|
||
|
end
|
||
|
|
||
|
|
||
|
local function lp_locale(tab)
|
||
|
tab = tab or {}
|
||
|
createcat(tab, "alnum", function(c) return ffi.C.isalnum(c) end)
|
||
|
createcat(tab, "alpha", function(c) return ffi.C.isalpha(c) end)
|
||
|
createcat(tab, "cntrl", function(c) return ffi.C.iscntrl(c) end)
|
||
|
createcat(tab, "digit", function(c) return ffi.C.isdigit(c) end)
|
||
|
createcat(tab, "graph", function(c) return ffi.C.isgraph(c) end)
|
||
|
createcat(tab, "lower", function(c) return ffi.C.islower(c) end)
|
||
|
createcat(tab, "print", function(c) return ffi.C.isprint(c) end)
|
||
|
createcat(tab, "punct", function(c) return ffi.C.ispunct(c) end)
|
||
|
createcat(tab, "space", function(c) return ffi.C.isspace(c) end)
|
||
|
createcat(tab, "upper", function(c) return ffi.C.isupper(c) end)
|
||
|
createcat(tab, "xdigit", function(c) return ffi.C.isxdigit(c) end)
|
||
|
return tab
|
||
|
end
|
||
|
|
||
|
|
||
|
local function lp_new(ct, size)
|
||
|
local pat = ffi.new(ct, size)
|
||
|
pat.treesize = size
|
||
|
patternid = patternid + 1
|
||
|
pat.id = patternid
|
||
|
return pat
|
||
|
end
|
||
|
|
||
|
|
||
|
local function lp_gc(ct)
|
||
|
valuetable[ct.id] = nil
|
||
|
if ct.code ~= nil then
|
||
|
ffi.C.free(ct.code.p)
|
||
|
ffi.C.free(ct.code)
|
||
|
end
|
||
|
end
|
||
|
|
||
|
local function lp_eq(ct1, ct2)
|
||
|
return tostring(ct1) == tostring(ct2)
|
||
|
end
|
||
|
|
||
|
local function lp_load(str, fcetab)
|
||
|
local pat, t = lpvm.load(str, fcetab, true)
|
||
|
valuetable[pat.id] = t
|
||
|
return pat
|
||
|
end
|
||
|
|
||
|
local function lp_loadfile(fname, fcetab)
|
||
|
local pat, t = lpvm.loadfile(fname, fcetab, true)
|
||
|
valuetable[pat.id] = t
|
||
|
return pat
|
||
|
end
|
||
|
|
||
|
local function lp_dump(ct, tree)
|
||
|
local funccount = 0
|
||
|
-- not compiled yet?
|
||
|
if ct.code == nil then
|
||
|
prepcompile(ct, 0)
|
||
|
end
|
||
|
local out = {}
|
||
|
if tree then
|
||
|
out[#out + 1] = ffi.string(uint32(ct.treesize), 4)
|
||
|
out[#out + 1] = ffi.string(ct.p, ffi.sizeof(treepatternelement) * ct.treesize)
|
||
|
else
|
||
|
out[#out + 1] = ffi.string(uint32(0), 4)
|
||
|
end
|
||
|
out[#out + 1] = ffi.string(uint32(ct.code.size), 4)
|
||
|
out[#out + 1] = ffi.string(ct.code.p, ct.code.size * ffi.sizeof(patternelement))
|
||
|
local t = valuetable[ct.id]
|
||
|
local len = t and #t or 0
|
||
|
out[#out + 1] = ffi.string(uint32(len), 4)
|
||
|
if len > 0 then
|
||
|
for _, val in ipairs(t) do
|
||
|
local typ = type(val)
|
||
|
if typ == 'string' then
|
||
|
out[#out + 1] = 'str'
|
||
|
out[#out + 1] = ffi.string(uint32(#val), 4)
|
||
|
out[#out + 1] = val
|
||
|
elseif typ == 'number' then
|
||
|
local val = tostring(val)
|
||
|
out[#out + 1] = 'num'
|
||
|
out[#out + 1] = ffi.string(uint32(#val), 4)
|
||
|
out[#out + 1] = val
|
||
|
elseif typ == 'cdata' then
|
||
|
out[#out + 1] = 'cdt'
|
||
|
out[#out + 1] = ffi.string(val, ffi.sizeof(val))
|
||
|
elseif typ == 'function' then
|
||
|
out[#out + 1] = 'fnc'
|
||
|
funccount = funccount + 1
|
||
|
local name = funcnames[val] or ('FNAME%03d'):format(funccount)
|
||
|
out[#out + 1] = ffi.string(uint32(#name), 4)
|
||
|
out[#out + 1] = name
|
||
|
if not funcnames[val] and debug.getupvalue(val, 1) then
|
||
|
io.write(("Patterns function (%d) contains upvalue (%s) - use symbol name for function (%s).\n"):format(funccount, debug.getupvalue(val, 1), name), 0)
|
||
|
end
|
||
|
local data = string.dump(val, true)
|
||
|
out[#out + 1] = ffi.string(uint32(#data), 4)
|
||
|
out[#out + 1] = data
|
||
|
else
|
||
|
error(("Type '%s' NYI for dump"):format(typ), 0)
|
||
|
end
|
||
|
end
|
||
|
end
|
||
|
return table.concat(out)
|
||
|
end
|
||
|
|
||
|
local function lp_save(ct, fname, tree)
|
||
|
local file = assert(io.open(fname, 'wb'))
|
||
|
file:write(lp_dump(ct, tree))
|
||
|
file:close()
|
||
|
end
|
||
|
|
||
|
|
||
|
local pattreg = {
|
||
|
["ptree"] = lp_printtree,
|
||
|
["pcode"] = lp_printcode,
|
||
|
["match"] = lp_match,
|
||
|
["streammatch"] = lp_streammatch,
|
||
|
["emulatestreammatch"] = lp_emulatestreammatch,
|
||
|
["setmaxbehind"] = lp_setmaxbehind,
|
||
|
["B"] = lp_behind,
|
||
|
["V"] = lp_V,
|
||
|
["C"] = lp_simplecapture,
|
||
|
["Cc"] = lp_constcapture,
|
||
|
["Cmt"] = lp_matchtime,
|
||
|
["Cb"] = lp_backref,
|
||
|
["Carg"] = lp_argcapture,
|
||
|
["Cp"] = lp_poscapture,
|
||
|
["Cs"] = lp_substcapture,
|
||
|
["Ct"] = lp_tablecapture,
|
||
|
["Cf"] = lp_foldcapture,
|
||
|
["Cg"] = lp_groupcapture,
|
||
|
["P"] = lp_P,
|
||
|
["S"] = lp_set,
|
||
|
["R"] = lp_range,
|
||
|
["L"] = lp_and,
|
||
|
["locale"] = lp_locale,
|
||
|
["version"] = lp_version,
|
||
|
["setmaxstack"] = lp_setmax,
|
||
|
["type"] = lp_type,
|
||
|
["enableleftrecursion"] = lp_enableleftrecursion,
|
||
|
["enablememoization"] = lpvm.enablememoization,
|
||
|
["enabletracing"] = lpvm.enabletracing,
|
||
|
["save"] = lp_save,
|
||
|
["dump"] = lp_dump,
|
||
|
["load"] = lp_load,
|
||
|
["loadfile"] = lp_loadfile,
|
||
|
["__mul"] = lp_seq,
|
||
|
["__add"] = lp_choice,
|
||
|
["__pow"] = lp_star,
|
||
|
["__len"] = lp_and,
|
||
|
["__div"] = lp_divcapture,
|
||
|
["__unm"] = lp_not,
|
||
|
["__sub"] = lp_sub,
|
||
|
}
|
||
|
|
||
|
local metareg = {
|
||
|
["__gc"] = lp_gc,
|
||
|
["__new"] = lp_new,
|
||
|
["__mul"] = lp_seq,
|
||
|
["__add"] = lp_choice,
|
||
|
["__pow"] = lp_star,
|
||
|
["__len"] = lp_and,
|
||
|
["__div"] = lp_divcapture,
|
||
|
["__unm"] = lp_not,
|
||
|
["__sub"] = lp_sub,
|
||
|
["__eq"] = lp_eq,
|
||
|
["__index"] = pattreg
|
||
|
}
|
||
|
|
||
|
ffi.metatype(treepattern, metareg)
|
||
|
|
||
|
return pattreg
|