626 lines
21 KiB
Lua
Raw Normal View History

2022-05-28 09:30:19 +02:00
--[[
LPEGLJ
lpcap.lua
Capture functions
Copyright (C) 2014 Rostislav Sacek.
based on LPeg v1.0 - PEG pattern matching for Lua
Lua.org & PUC-Rio written by Roberto Ierusalimschy
http://www.inf.puc-rio.br/~roberto/lpeg/
** Permission is hereby granted, free of charge, to any person obtaining
** a copy of this software and associated documentation files (the
** "Software"), to deal in the Software without restriction, including
** without limitation the rights to use, copy, modify, merge, publish,
** distribute, sublicense, and/or sell copies of the Software, and to
** permit persons to whom the Software is furnished to do so, subject to
** the following conditions:
**
** The above copyright notice and this permission notice shall be
** included in all copies or substantial portions of the Software.
**
** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
** SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**
** [ MIT license: http://www.opensource.org/licenses/mit-license.php ]
--]]
local ffi = require "ffi"
local Cclose = 0
local Cposition = 1
local Cconst = 2
local Cbackref = 3
local Carg = 4
local Csimple = 5
local Ctable = 6
local Cfunction = 7
local Cquery = 8
local Cstring = 9
local Cnum = 10
local Csubst = 11
local Cfold = 12
local Cruntime = 13
local Cgroup = 14
local MAXSTRCAPS = 10
local pushcapture
local addonestring
-- Goes back in a list of captures looking for an open capture
-- corresponding to a close
local function findopen(cs, index)
local n = 0; -- number of closes waiting an open
while true do
index = index - 1
if cs.ocap[index].kind == Cclose then
n = n + 1 -- one more open to skip
elseif cs.ocap[index].siz == 0 then
if n == 0 then
return index
end
n = n - 1
end
end
end
local function checknextcap(cs, captop)
local cap = cs.cap;
-- not a single capture? ((cap)->siz != 0)
if cs.ocap[cap].siz == 0 then
local n = 0; -- number of opens waiting a close
-- look for corresponding close
while true do
cap = cap + 1
if cap > captop then return end
if cs.ocap[cap].kind == Cclose then
n = n - 1
if n + 1 == 0 then
break;
end
elseif cs.ocap[cap].siz == 0 then
n = n + 1
end
end
end
cap = cap + 1; -- + 1 to skip last close (or entire single capture)
if cap > captop then return end
return true
end
-- Go to the next capture
local function nextcap(cs)
local cap = cs.cap;
-- not a single capture? ((cap)->siz != 0)
if cs.ocap[cap].siz == 0 then
local n = 0; -- number of opens waiting a close
-- look for corresponding close
while true do
cap = cap + 1
if cs.ocap[cap].kind == Cclose then
n = n - 1
if n + 1 == 0 then
break;
end
elseif cs.ocap[cap].siz == 0 then
n = n + 1
end
end
end
cs.cap = cap + 1; -- + 1 to skip last close (or entire single capture)
end
-- Push on the Lua stack all values generated by nested captures inside
-- the current capture. Returns number of values pushed. 'addextra'
-- makes it push the entire match after all captured values. The
-- entire match is pushed also if there are no other nested values,
-- so the function never returns zero.
local function pushnestedvalues(cs, addextra, out, valuetable)
local co = cs.cap
cs.cap = cs.cap + 1
-- no nested captures?
if cs.ocap[cs.cap - 1].siz ~= 0 then
local st = cs.ocap[co].s
local l = cs.ocap[co].siz - 1
out.outindex = out.outindex + 1
out.out[out.outindex] = cs.s and cs.s:sub(st, st + l - 1) or cs.stream(st, st + l - 1)
return 1; -- that is it
else
local n = 0;
while cs.ocap[cs.cap].kind ~= Cclose do -- repeat for all nested patterns
n = n + pushcapture(cs, out, valuetable);
end
-- need extra?
if addextra or n == 0 then
local st = cs.ocap[co].s
local l = cs.ocap[cs.cap].s - cs.ocap[co].s
out.outindex = out.outindex + 1
out.out[out.outindex] = cs.s and cs.s:sub(st, st + l - 1) or cs.stream(st, st + l - 1)
n = n + 1
end
cs.cap = cs.cap + 1 -- skip close entry
return n;
end
end
-- Push only the first value generated by nested captures
local function pushonenestedvalue(cs, out, valuetable)
local n = pushnestedvalues(cs, false, out, valuetable)
for i = n, 2, -1 do
out.out[out.outindex] = nil
out.outindex = out.outindex - 1
end
end
-- Try to find a named group capture with the name given at the top of
-- the stack; goes backward from 'cap'.
local function findback(cs, cap, name, valuetable)
-- repeat until end of list
while cap > 0 do
cap = cap - 1
local continue
if cs.ocap[cap].kind == Cclose then
cap = findopen(cs, cap); -- skip nested captures
elseif cs.ocap[cap].siz == 0 then
continue = true -- opening an enclosing capture: skip and get previous
end
if not continue and cs.ocap[cap].kind == Cgroup and cs.ocap[cap].idx ~= 0 then
local gname = valuetable[cs.ocap[cap].idx] -- get group name
-- right group?
if name == gname then
return cap;
end
end
end
error(("back reference '%s' not found"):format(name), 0)
end
-- Back-reference capture. Return number of values pushed.
local function backrefcap(cs, out, valuetable)
local curr = cs.cap;
local name = valuetable[cs.ocap[cs.cap].idx] -- reference name
cs.cap = findback(cs, curr, name, valuetable) -- find corresponding group
local n = pushnestedvalues(cs, false, out, valuetable); -- push group's values
cs.cap = curr + 1;
return n;
end
-- Table capture: creates a new table and populates it with nested
-- captures.
local function tablecap(cs, out, valuetable)
local n = 0;
local t = {}
cs.cap = cs.cap + 1
-- table is empty
if cs.ocap[cs.cap - 1].siz == 0 then
while cs.ocap[cs.cap].kind ~= Cclose do
local subout = { outindex = 0, out = {} }
-- named group?
if cs.ocap[cs.cap].kind == Cgroup and cs.ocap[cs.cap].idx ~= 0 then
local groupname = valuetable[cs.ocap[cs.cap].idx] -- push group name
pushonenestedvalue(cs, subout, valuetable)
t[groupname] = subout.out[1]
else
-- not a named group
local k = pushcapture(cs, subout, valuetable)
-- store all values into table
for i = 1, subout.outindex do
t[i + n] = subout.out[i]
end
n = n + k;
end
end
cs.cap = cs.cap + 1 -- skip close entry
end
out.outindex = out.outindex + 1
out.out[out.outindex] = t
return 1; -- number of values pushed (only the table)
end
-- Table-query capture
local function querycap(cs, out, valuetable)
local table = valuetable[cs.ocap[cs.cap].idx]
local subout = { outindex = 0, out = {} }
pushonenestedvalue(cs, subout, valuetable) -- get nested capture
-- query cap. value at table
if table[subout.out[1]] ~= nil then
out.outindex = out.outindex + 1
out.out[out.outindex] = table[subout.out[1]]
return 1
end
return 0
end
-- Fold capture
local function foldcap(cs, out, valuetable)
local fce = valuetable[cs.ocap[cs.cap].idx]
cs.cap = cs.cap + 1
-- no nested captures?
-- or no nested captures (large subject)?
if cs.ocap[cs.cap - 1].siz ~= 0 or
cs.ocap[cs.cap].kind == Cclose then
error("no initial value for fold capture", 0);
end
local subout = { outindex = 0; out = {} }
local n = pushcapture(cs, subout, valuetable) -- nested captures with no values?
if n == 0 then
error("no initial value for fold capture", 0);
end
local acumulator = subout.out[1] -- leave only one result for accumulator
while cs.ocap[cs.cap].kind ~= Cclose do
local subout = { outindex = 0; out = {} }
n = pushcapture(cs, subout, valuetable); -- get next capture's values
acumulator = fce(acumulator, unpack(subout.out, 1, subout.outindex)) -- call folding function
end
cs.cap = cs.cap + 1; -- skip close entry
out.outindex = out.outindex + 1
out.out[out.outindex] = acumulator
return 1; -- only accumulator left on the stack
end
local function retcount(...)
return select('#', ...), { ... }
end
-- Function capture
local function functioncap(cs, out, valuetable)
local fce = valuetable[cs.ocap[cs.cap].idx] -- push function
local subout = { outindex = 0, out = {} }
local n = pushnestedvalues(cs, false, subout, valuetable); -- push nested captures
local count, ret = retcount(fce(unpack(subout.out, 1, n))) -- call function
for i = 1, count do
out.outindex = out.outindex + 1
out.out[out.outindex] = ret[i]
end
return count
end
-- Select capture
local function numcap(cs, out, valuetable)
local idx = valuetable[cs.ocap[cs.cap].idx] -- value to select
-- no values?
if idx == 0 then
nextcap(cs); -- skip entire capture
return 0; -- no value produced
else
local subout = { outindex = 0, out = {} }
local n = pushnestedvalues(cs, false, subout, valuetable)
-- invalid index?
if n < idx then
error(("no capture '%d'"):format(idx), 0)
else
out.outindex = out.outindex + 1
out.out[out.outindex] = subout.out[idx] -- get selected capture
return 1;
end
end
end
-- Calls a runtime capture. Returns number of captures removed by
-- the call, including the initial Cgroup. (Captures to be added are
-- on the Lua stack.)
local function runtimecap(cs, close, s, out, valuetable)
local open = findopen(cs, close)
assert(cs.ocap[open].kind == Cgroup)
cs.ocap[close].kind = Cclose; -- closes the group
cs.ocap[close].s = s;
cs.cap = open;
local fce = valuetable[cs.ocap[cs.cap].idx] -- push function to be called
local subout = { outindex = 0, out = {} }
local n = pushnestedvalues(cs, false, subout, valuetable); -- push nested captures
local count, ret = retcount(fce(cs.s or cs.stream, s, unpack(subout.out, 1, n))) -- call dynamic function
for i = 1, count do
out.outindex = out.outindex + 1
out.out[out.outindex] = ret[i]
end
return close - open -- number of captures of all kinds removed
end
-- Collect values from current capture into array 'cps'. Current
-- capture must be Cstring (first call) or Csimple (recursive calls).
-- (In first call, fills %0 with whole match for Cstring.)
-- Returns number of elements in the array that were filled.
local function getstrcaps(cs, cps, n)
local k = n
n = n + 1
cps[k + 1].isstring = true; -- get string value
cps[k + 1].startstr = cs.ocap[cs.cap].s; -- starts here
cs.cap = cs.cap + 1
-- nested captures?
if cs.ocap[cs.cap - 1].siz == 0 then
-- traverse them
while cs.ocap[cs.cap].kind ~= Cclose do
-- too many captures?
if n >= MAXSTRCAPS then
nextcap(cs); -- skip extra captures (will not need them)
elseif cs.ocap[cs.cap].kind == Csimple then
-- string?
n = getstrcaps(cs, cps, n); -- put info. into array
else
cps[n + 1].isstring = false; -- not a string
cps[n + 1].origcap = cs.cap; -- keep original capture
nextcap(cs);
n = n + 1;
end
end
cs.cap = cs.cap + 1 -- skip close
end
cps[k + 1].endstr = cs.ocap[cs.cap - 1].s + cs.ocap[cs.cap - 1].siz - 1 -- ends here
return n;
end
-- add next capture value (which should be a string) to buffer 'b'
-- String capture: add result to buffer 'b' (instead of pushing
-- it into the stack)
local function stringcap(cs, b, valuetable)
local cps = {}
for i = 1, MAXSTRCAPS do
cps[#cps + 1] = {}
end
local fmt = valuetable[cs.ocap[cs.cap].idx]
local n = getstrcaps(cs, cps, 0) - 1; -- collect nested captures
local i = 1
-- traverse them
while i <= #fmt do
local c = fmt:sub(i, i)
-- not an escape?
if c ~= '%' then
b[#b + 1] = c -- add it to buffer
elseif fmt:sub(i + 1, i + 1) < '0' or fmt:sub(i + 1, i + 1) > '9' then
-- not followed by a digit?
i = i + 1
b[#b + 1] = fmt:sub(i, i)
else
i = i + 1
local l = fmt:sub(i, i) - '0'; -- capture index
if l > n then
error(("invalid capture index (%d)"):format(l), 0)
elseif cps[l + 1].isstring then
b[#b + 1] = cs.s and cs.s:sub(cps[l + 1].startstr, cps[l + 1].endstr - cps[l + 1].startstr + cps[l + 1].startstr - 1) or
cs.stream(cps[l + 1].startstr, cps[l + 1].endstr - cps[l + 1].startstr + cps[l + 1].startstr - 1)
else
local curr = cs.cap;
cs.cap = cps[l + 1].origcap; -- go back to evaluate that nested capture
if not addonestring(cs, b, "capture", valuetable) then
error(("no values in capture index %d"):format(l), 0)
end
cs.cap = curr; -- continue from where it stopped
end
end
i = i + 1
end
end
-- Substitution capture: add result to buffer 'b'
local function substcap(cs, b, valuetable)
local curr = cs.ocap[cs.cap].s;
-- no nested captures?
if cs.ocap[cs.cap].siz ~= 0 then
-- keep original text
b[#b + 1] = cs.s and cs.s:sub(curr, cs.ocap[cs.cap].siz - 1 + curr - 1) or
cs.stream(curr, cs.ocap[cs.cap].siz - 1 + curr - 1)
else
cs.cap = cs.cap + 1 -- skip open entry
-- traverse nested captures
while cs.ocap[cs.cap].kind ~= Cclose do
local next = cs.ocap[cs.cap].s;
b[#b + 1] = cs.s and cs.s:sub(curr, next - curr + curr - 1) or
cs.stream(curr, next - curr + curr - 1) -- add text up to capture
if addonestring(cs, b, "replacement", valuetable) then
curr = cs.ocap[cs.cap - 1].s + cs.ocap[cs.cap - 1].siz - 1; -- continue after match
else
-- no capture value
curr = next; -- keep original text in final result
end
end
b[#b + 1] = cs.s and cs.s:sub(curr, curr + cs.ocap[cs.cap].s - curr - 1) or
cs.stream(curr, curr + cs.ocap[cs.cap].s - curr - 1) -- add last piece of text
end
cs.cap = cs.cap + 1 -- go to next capture
end
-- Evaluates a capture and adds its first value to buffer 'b'; returns
-- whether there was a value
function addonestring(cs, b, what, valuetable)
local tag = cs.ocap[cs.cap].kind
if tag == Cstring then
stringcap(cs, b, valuetable); -- add capture directly to buffer
return 1
elseif tag == Csubst then
substcap(cs, b, valuetable); -- add capture directly to buffer
return 1
else
local subout = { outindex = 0, out = {} }
local n = pushcapture(cs, subout, valuetable);
if n > 0 then
if type(subout.out[1]) ~= 'string' and type(subout.out[1]) ~= 'number' then
error(("invalid %s value (a %s)"):format(what, type(subout.out[1])), 0)
end
b[#b + 1] = subout.out[1]
return n
end
end
end
-- Push all values of the current capture into the stack; returns
-- number of values pushed
function pushcapture(cs, out, valuetable)
local type = cs.ocap[cs.cap].kind
if type == Cposition then
out.outindex = out.outindex + 1
out.out[out.outindex] = cs.ocap[cs.cap].s
cs.cap = cs.cap + 1;
return 1;
elseif type == Cconst then
out.outindex = out.outindex + 1
out.out[out.outindex] = valuetable[cs.ocap[cs.cap].idx]
cs.cap = cs.cap + 1
return 1;
elseif type == Carg then
local arg = valuetable[cs.ocap[cs.cap].idx]
cs.cap = cs.cap + 1
if arg > cs.ptopcount then
error(("reference to absent extra argument #%d"):format(arg), 0)
end
out.outindex = out.outindex + 1
out.out[out.outindex] = cs.ptop[arg]
return 1;
elseif type == Csimple then
local k = pushnestedvalues(cs, true, out, valuetable)
local index = out.outindex
table.insert(out.out, index - k + 1, out.out[index])
out[index + 1] = nil
return k;
elseif type == Cruntime then
out.outindex = out.outindex + 1
out.out[out.outindex] = valuetable[cs.ocap[cs.cap].idx]
cs.cap = cs.cap + 1;
return 1;
elseif type == Cstring then
local b = {}
stringcap(cs, b, valuetable)
out.outindex = out.outindex + 1
out.out[out.outindex] = table.concat(b)
return 1;
elseif type == Csubst then
local b = {}
substcap(cs, b, valuetable);
out.outindex = out.outindex + 1
out.out[out.outindex] = table.concat(b)
return 1;
elseif type == Cgroup then
-- anonymous group?
if cs.ocap[cs.cap].idx == 0 then
return pushnestedvalues(cs, false, out, valuetable); -- add all nested values
else
-- named group: add no values
nextcap(cs); -- skip capture
return 0
end
elseif type == Cbackref then
return backrefcap(cs, out, valuetable)
elseif type == Ctable then
return tablecap(cs, out, valuetable)
elseif type == Cfunction then
return functioncap(cs, out, valuetable)
elseif type == Cnum then
return numcap(cs, out, valuetable)
elseif type == Cquery then
return querycap(cs, out, valuetable)
elseif type == Cfold then
return foldcap(cs, out, valuetable)
else
assert(false)
end
end
-- Prepare a CapState structure and traverse the entire list of
-- captures in the stack pushing its results. 's' is the subject
-- string, 'r' is the final position of the match, and 'ptop'
-- the index in the stack where some useful values were pushed.
-- Returns the number of results pushed. (If the list produces no
-- results, push the final position of the match.)
local function getcaptures(capture, s, stream, r, valuetable, ...)
local n = 0;
local cs = { cap = 0 }
local out = { outindex = 0; out = {} }
-- is there any capture?
if capture[cs.cap].kind ~= Cclose then
cs.ocap = capture
cs.s = s;
cs.stream = stream
cs.ptopcount, cs.ptop = retcount(...)
repeat -- collect their values
n = n + pushcapture(cs, out, valuetable)
until cs.ocap[cs.cap].kind == Cclose
end
-- no capture values?
if n == 0 then
if not r then
return
else
return r
end
end
assert(out.outindex < 7998, "(too many captures)")
return unpack(out.out, 1, out.outindex)
end
local function getcapturesruntime(capture, s, stream, notdelete, min, max, captop, valuetable, ...)
local n = 0;
local cs = { cap = min }
local out = { outindex = 0; out = {} }
cs.ocap = capture
cs.s = s
cs.stream = stream
cs.ptopcount, cs.ptop = retcount(...)
local start = 0
repeat -- collect their values
if not checknextcap(cs, max) then break end
local notdelete = notdelete or capture[cs.cap].kind == Cgroup and capture[cs.cap].idx ~= 0 and capture[cs.cap].candelete == 0
pushcapture(cs, out, valuetable)
if notdelete then
start = cs.cap
else
n = n + cs.cap - start
for i = 0, captop - cs.cap - 1 do
ffi.copy(capture + start + i, capture + cs.cap + i, ffi.sizeof('CAPTURE'))
end
max = max - (cs.cap - start)
captop = captop - (cs.cap - start)
cs.cap = start
end
until cs.cap == max
assert(out.outindex < 7998, "(too many captures)")
return n, out.out, out.outindex
end
return {
getcaptures = getcaptures,
runtimecap = runtimecap,
getcapturesruntime = getcapturesruntime,
}