diff --git a/changelog.md b/changelog.md index 02a8feb..342626b 100644 --- a/changelog.md +++ b/changelog.md @@ -1,3 +1,8 @@ +# 0.21.0.0 +# 0.21.1.0 + +Alignement sur SWADE v1.1.X et traductions de clés manquantes + # 0.21.0.0 Alignement sur SWADE v1.1.X diff --git a/fr.json b/fr.json index 427b408..d530b10 100644 --- a/fr.json +++ b/fr.json @@ -18,6 +18,8 @@ "ITEM.TypeSkill": "Compétence", "ITEM.TypePower": "Pouvoir", "ITEM.TypeAbility": "Capacité", + "CARD.TypeAdventure":" Cartes d'aventure", + "CARD.TypePoker":" Cartes de Poker", "SWADE.Name": "Nom", "SWADE.Tough": "Résistance", "SWADE.Race": "Espèce", @@ -306,6 +308,39 @@ "SWADE.Crew": "Équipage requis", "SWADE.Passengers": "Passagers", + "SWADE.ActAfterCurrentCombatant":"Agir après le combattant actuel", + "SWADE.ActNow": "Agir maintenant", + "SWADE.ActionDeckPresetDark":"Cartes d'action (Sombre)", + "SWADE.ActionDeckPresetLight":" Action Deck (Light)", + "SWADE.ActionDeckReset":" Reset Action Deck", + "SWADE.ActionDeckResetNotification":"Réinitialise les cartes d'action", + "SWADE.ActivateArcaneDevice":"Activer l'objet", + "SWADE.AddTokenFollowers":"Ajouter les tokens sélectionnés comme suivants", + "SWADE.AnimalSmartsMarker":"Un", + "SWADE.ArcaneDevice":"Objet Arcanique", + "SWADE.ArcaneSkill":"Compétence d'arcane", + "SWADE.Archetype":"Archétype", + "SWADE.ArchetypeAbilities":"Capacités d'archetype", + "SWADE.ButtonReset":"Ré-initialisation", + "SWADE.ButtonSubmit":"Soumettre", + "SWADE.ConvictionActivate":"Faire appel à leur convictin!", + "SWADE.CoreSkill":"Compétence de base", + "SWADE.CoreSkills":"Compétences de base", + "SWADE.CoreSkillsDesc":" >-", + "SWADE.DrawInit":"Lancer l'initiative", + "SWADE.Follow":"Suivre {name}", + "SWADE.GroupByName":"Grouper par nom", + "SWADE.Hold":"Conserver", + "SWADE.LoseTurn":"Toggle Lose Turn", + "SWADE.MakeGroupLeader":"Faire un Chef de groupe", + "SWADE.PPCost":"Coût en PP", + "SWADE.RemoveGroupLeader":"Supprimer le chef de groupe", + "SWADE.SelectColor":"Selectionner une couleur", + "SWADE.SetGroupColor":"Choisir une couleur de groupe", + "SWADE.Unfollow":"Arrêter de suivre {name}", + "SWADE.WeaponsAndArmor":"Armes & Armuress", + "SWADE.WildCard":"Joker", + "SWADE.String": "Texte", "SWADE.Number": "Nombre", "SWADE.Checkbox": "Checkbox", @@ -465,6 +500,7 @@ "SWADE.Background": "Background", "SWADE.ActiveEffects.Add": "Ajouter un Active Effect", "SWADE.ActiveEffects.Source": "Source", + "SWADE.Advances.Number": "Numéro d'avancement", "Actor": "Acteur", "Vehicles": "Véhicules", @@ -506,5 +542,6 @@ "6. Bestiary": "6. Bestiaire", "7. Game Mastering": "6. Conseils aux Maîtres de Jeu", "Character Creation": "Création de Personnages", - "The Adventure toolkit": "Boite à Outils des Aventures" + "The Adventure toolkit": "Boite à Outils des Aventures", + "name":" Portée" } diff --git a/module.json b/module.json index 600ed25..1a67696 100644 --- a/module.json +++ b/module.json @@ -2,7 +2,7 @@ "name": "swade-fr", "title": "SWADE - Traduction française", "description": "Ajoute le français (FRANCE) au système SWADE.
Une traduction Babele des compendiums est inclue mais optionnelle. (Encore en test, à utiliser à vos risques et périls !)
*** Join the official Discord server: Official Discord
*** Rejoignez la communauté Francophone: Discord francophone
", - "version": "0.21.0.0", + "version": "0.21.1.0", "minimumCoreVersion" : "0.7.9", "compatibleCoreVersion" : "9", "author": "BoboursToutCool, Gronyon, Kyane, LeRatierBretonnien, Sasmira, U~man,X.O. de Vorcen", @@ -65,5 +65,5 @@ "url": "https://www.uberwald.me/gitea/public//foundryvtt-swade-fr", "manifest": "https://www.uberwald.me/gitea/public/foundryvtt-swade-fr/raw/branch/master/module.json", - "download": "https://www.uberwald.me/gitea/public/foundryvtt-swade-fr/archive/foundryvtt-swade-fr-0.21.0.0.zip" + "download": "https://www.uberwald.me/gitea/public/foundryvtt-swade-fr/archive/foundryvtt-swade-fr-0.21.1.0.zip" } \ No newline at end of file diff --git a/tools/detect_missing_strings.lua b/tools/detect_missing_strings.lua new file mode 100644 index 0000000..34f22a0 --- /dev/null +++ b/tools/detect_missing_strings.lua @@ -0,0 +1,39 @@ +package.path = package.path .. ";luajson/?.lua" +local JSON = require"json" + +local enjsonf = "../../swade/src/lang/en.yml" +local frjsonf = "../fr.json" + +local fp = io.open(enjsonf, "r") +local line = fp:read() +local entags = {} +while line do + --print("LINE", line) + local key, value = line:match("([%w%.]*):([>%-%+%p%s%w%d%.]*)" ) + if key then + entags[key] = value + end + line = fp:read() +end +fp:close() + +fp = io.open(frjsonf, "r") +local frtags = JSON.decode( fp:read("*a") ) +fp:close() + +local todisplay = {} +for tag, value in pairs(entags) do + if not frtags[tag] then + todisplay[#todisplay+1] = { tag=tag, value=value } + end +end + +table.sort(todisplay, function (a, b) + return a.tag < b.tag + end +) +for _, tagDef in pairs(todisplay) do + print('"'.. tagDef.tag ..'":"'.. tagDef.value..'",') +end + + diff --git a/tools/lpeg/lpcap.lua b/tools/lpeg/lpcap.lua new file mode 100644 index 0000000..06fbee8 --- /dev/null +++ b/tools/lpeg/lpcap.lua @@ -0,0 +1,625 @@ +--[[ +LPEGLJ +lpcap.lua +Capture functions +Copyright (C) 2014 Rostislav Sacek. +based on LPeg v1.0 - PEG pattern matching for Lua +Lua.org & PUC-Rio written by Roberto Ierusalimschy +http://www.inf.puc-rio.br/~roberto/lpeg/ + +** Permission is hereby granted, free of charge, to any person obtaining +** a copy of this software and associated documentation files (the +** "Software"), to deal in the Software without restriction, including +** without limitation the rights to use, copy, modify, merge, publish, +** distribute, sublicense, and/or sell copies of the Software, and to +** permit persons to whom the Software is furnished to do so, subject to +** the following conditions: +** +** The above copyright notice and this permission notice shall be +** included in all copies or substantial portions of the Software. +** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +** SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +** +** [ MIT license: http://www.opensource.org/licenses/mit-license.php ] +--]] +local ffi = require "ffi" + +local Cclose = 0 +local Cposition = 1 +local Cconst = 2 +local Cbackref = 3 +local Carg = 4 +local Csimple = 5 +local Ctable = 6 +local Cfunction = 7 +local Cquery = 8 +local Cstring = 9 +local Cnum = 10 +local Csubst = 11 +local Cfold = 12 +local Cruntime = 13 +local Cgroup = 14 + +local MAXSTRCAPS = 10 + +local pushcapture +local addonestring + + +-- Goes back in a list of captures looking for an open capture +-- corresponding to a close + +local function findopen(cs, index) + local n = 0; -- number of closes waiting an open + while true do + index = index - 1 + if cs.ocap[index].kind == Cclose then + n = n + 1 -- one more open to skip + elseif cs.ocap[index].siz == 0 then + if n == 0 then + return index + end + n = n - 1 + end + end +end + + +local function checknextcap(cs, captop) + local cap = cs.cap; + -- not a single capture? ((cap)->siz != 0) + if cs.ocap[cap].siz == 0 then + local n = 0; -- number of opens waiting a close + -- look for corresponding close + while true do + cap = cap + 1 + if cap > captop then return end + if cs.ocap[cap].kind == Cclose then + n = n - 1 + if n + 1 == 0 then + break; + end + elseif cs.ocap[cap].siz == 0 then + n = n + 1 + end + end + end + cap = cap + 1; -- + 1 to skip last close (or entire single capture) + if cap > captop then return end + return true +end + + +-- Go to the next capture + +local function nextcap(cs) + local cap = cs.cap; + -- not a single capture? ((cap)->siz != 0) + if cs.ocap[cap].siz == 0 then + local n = 0; -- number of opens waiting a close + -- look for corresponding close + while true do + cap = cap + 1 + if cs.ocap[cap].kind == Cclose then + n = n - 1 + if n + 1 == 0 then + break; + end + elseif cs.ocap[cap].siz == 0 then + n = n + 1 + end + end + end + cs.cap = cap + 1; -- + 1 to skip last close (or entire single capture) +end + + +-- Push on the Lua stack all values generated by nested captures inside +-- the current capture. Returns number of values pushed. 'addextra' +-- makes it push the entire match after all captured values. The +-- entire match is pushed also if there are no other nested values, +-- so the function never returns zero. + +local function pushnestedvalues(cs, addextra, out, valuetable) + local co = cs.cap + cs.cap = cs.cap + 1 + -- no nested captures? + if cs.ocap[cs.cap - 1].siz ~= 0 then + local st = cs.ocap[co].s + local l = cs.ocap[co].siz - 1 + out.outindex = out.outindex + 1 + out.out[out.outindex] = cs.s and cs.s:sub(st, st + l - 1) or cs.stream(st, st + l - 1) + return 1; -- that is it + else + local n = 0; + while cs.ocap[cs.cap].kind ~= Cclose do -- repeat for all nested patterns + n = n + pushcapture(cs, out, valuetable); + end + -- need extra? + if addextra or n == 0 then + local st = cs.ocap[co].s + local l = cs.ocap[cs.cap].s - cs.ocap[co].s + out.outindex = out.outindex + 1 + out.out[out.outindex] = cs.s and cs.s:sub(st, st + l - 1) or cs.stream(st, st + l - 1) + n = n + 1 + end + cs.cap = cs.cap + 1 -- skip close entry + return n; + end +end + + +-- Push only the first value generated by nested captures + +local function pushonenestedvalue(cs, out, valuetable) + local n = pushnestedvalues(cs, false, out, valuetable) + for i = n, 2, -1 do + out.out[out.outindex] = nil + out.outindex = out.outindex - 1 + end +end + + +-- Try to find a named group capture with the name given at the top of +-- the stack; goes backward from 'cap'. + +local function findback(cs, cap, name, valuetable) + -- repeat until end of list + while cap > 0 do + cap = cap - 1 + local continue + if cs.ocap[cap].kind == Cclose then + cap = findopen(cs, cap); -- skip nested captures + elseif cs.ocap[cap].siz == 0 then + continue = true -- opening an enclosing capture: skip and get previous + end + if not continue and cs.ocap[cap].kind == Cgroup and cs.ocap[cap].idx ~= 0 then + local gname = valuetable[cs.ocap[cap].idx] -- get group name + -- right group? + if name == gname then + return cap; + end + end + end + error(("back reference '%s' not found"):format(name), 0) +end + + +-- Back-reference capture. Return number of values pushed. + +local function backrefcap(cs, out, valuetable) + local curr = cs.cap; + local name = valuetable[cs.ocap[cs.cap].idx] -- reference name + cs.cap = findback(cs, curr, name, valuetable) -- find corresponding group + local n = pushnestedvalues(cs, false, out, valuetable); -- push group's values + cs.cap = curr + 1; + return n; +end + + +-- Table capture: creates a new table and populates it with nested +-- captures. + +local function tablecap(cs, out, valuetable) + local n = 0; + local t = {} + cs.cap = cs.cap + 1 + -- table is empty + if cs.ocap[cs.cap - 1].siz == 0 then + while cs.ocap[cs.cap].kind ~= Cclose do + local subout = { outindex = 0, out = {} } + -- named group? + if cs.ocap[cs.cap].kind == Cgroup and cs.ocap[cs.cap].idx ~= 0 then + local groupname = valuetable[cs.ocap[cs.cap].idx] -- push group name + pushonenestedvalue(cs, subout, valuetable) + t[groupname] = subout.out[1] + else + -- not a named group + local k = pushcapture(cs, subout, valuetable) + -- store all values into table + for i = 1, subout.outindex do + t[i + n] = subout.out[i] + end + n = n + k; + end + end + cs.cap = cs.cap + 1 -- skip close entry + end + out.outindex = out.outindex + 1 + out.out[out.outindex] = t + return 1; -- number of values pushed (only the table) +end + + +-- Table-query capture + +local function querycap(cs, out, valuetable) + local table = valuetable[cs.ocap[cs.cap].idx] + local subout = { outindex = 0, out = {} } + pushonenestedvalue(cs, subout, valuetable) -- get nested capture + -- query cap. value at table + if table[subout.out[1]] ~= nil then + out.outindex = out.outindex + 1 + out.out[out.outindex] = table[subout.out[1]] + return 1 + end + return 0 +end + + +-- Fold capture + +local function foldcap(cs, out, valuetable) + local fce = valuetable[cs.ocap[cs.cap].idx] + cs.cap = cs.cap + 1 + -- no nested captures? + -- or no nested captures (large subject)? + if cs.ocap[cs.cap - 1].siz ~= 0 or + cs.ocap[cs.cap].kind == Cclose then + error("no initial value for fold capture", 0); + end + local subout = { outindex = 0; out = {} } + local n = pushcapture(cs, subout, valuetable) -- nested captures with no values? + if n == 0 then + error("no initial value for fold capture", 0); + end + local acumulator = subout.out[1] -- leave only one result for accumulator + while cs.ocap[cs.cap].kind ~= Cclose do + local subout = { outindex = 0; out = {} } + n = pushcapture(cs, subout, valuetable); -- get next capture's values + acumulator = fce(acumulator, unpack(subout.out, 1, subout.outindex)) -- call folding function + end + cs.cap = cs.cap + 1; -- skip close entry + out.outindex = out.outindex + 1 + out.out[out.outindex] = acumulator + return 1; -- only accumulator left on the stack +end + + +local function retcount(...) + return select('#', ...), { ... } +end + + +-- Function capture + +local function functioncap(cs, out, valuetable) + local fce = valuetable[cs.ocap[cs.cap].idx] -- push function + local subout = { outindex = 0, out = {} } + local n = pushnestedvalues(cs, false, subout, valuetable); -- push nested captures + local count, ret = retcount(fce(unpack(subout.out, 1, n))) -- call function + for i = 1, count do + out.outindex = out.outindex + 1 + out.out[out.outindex] = ret[i] + end + return count +end + + +-- Select capture + +local function numcap(cs, out, valuetable) + local idx = valuetable[cs.ocap[cs.cap].idx] -- value to select + -- no values? + if idx == 0 then + nextcap(cs); -- skip entire capture + return 0; -- no value produced + else + local subout = { outindex = 0, out = {} } + local n = pushnestedvalues(cs, false, subout, valuetable) + -- invalid index? + if n < idx then + error(("no capture '%d'"):format(idx), 0) + else + out.outindex = out.outindex + 1 + out.out[out.outindex] = subout.out[idx] -- get selected capture + return 1; + end + end +end + + +-- Calls a runtime capture. Returns number of captures removed by +-- the call, including the initial Cgroup. (Captures to be added are +-- on the Lua stack.) + +local function runtimecap(cs, close, s, out, valuetable) + local open = findopen(cs, close) + assert(cs.ocap[open].kind == Cgroup) + cs.ocap[close].kind = Cclose; -- closes the group + cs.ocap[close].s = s; + cs.cap = open; + local fce = valuetable[cs.ocap[cs.cap].idx] -- push function to be called + local subout = { outindex = 0, out = {} } + local n = pushnestedvalues(cs, false, subout, valuetable); -- push nested captures + local count, ret = retcount(fce(cs.s or cs.stream, s, unpack(subout.out, 1, n))) -- call dynamic function + for i = 1, count do + out.outindex = out.outindex + 1 + out.out[out.outindex] = ret[i] + end + return close - open -- number of captures of all kinds removed +end + +-- Collect values from current capture into array 'cps'. Current +-- capture must be Cstring (first call) or Csimple (recursive calls). +-- (In first call, fills %0 with whole match for Cstring.) +-- Returns number of elements in the array that were filled. + +local function getstrcaps(cs, cps, n) + local k = n + n = n + 1 + cps[k + 1].isstring = true; -- get string value + cps[k + 1].startstr = cs.ocap[cs.cap].s; -- starts here + cs.cap = cs.cap + 1 + -- nested captures? + if cs.ocap[cs.cap - 1].siz == 0 then + -- traverse them + while cs.ocap[cs.cap].kind ~= Cclose do + -- too many captures? + if n >= MAXSTRCAPS then + nextcap(cs); -- skip extra captures (will not need them) + elseif cs.ocap[cs.cap].kind == Csimple then + -- string? + n = getstrcaps(cs, cps, n); -- put info. into array + else + cps[n + 1].isstring = false; -- not a string + cps[n + 1].origcap = cs.cap; -- keep original capture + nextcap(cs); + n = n + 1; + end + end + cs.cap = cs.cap + 1 -- skip close + end + cps[k + 1].endstr = cs.ocap[cs.cap - 1].s + cs.ocap[cs.cap - 1].siz - 1 -- ends here + return n; +end + + +-- add next capture value (which should be a string) to buffer 'b' + +-- String capture: add result to buffer 'b' (instead of pushing +-- it into the stack) + +local function stringcap(cs, b, valuetable) + local cps = {} + for i = 1, MAXSTRCAPS do + cps[#cps + 1] = {} + end + local fmt = valuetable[cs.ocap[cs.cap].idx] + local n = getstrcaps(cs, cps, 0) - 1; -- collect nested captures + local i = 1 + -- traverse them + while i <= #fmt do + local c = fmt:sub(i, i) + -- not an escape? + if c ~= '%' then + b[#b + 1] = c -- add it to buffer + elseif fmt:sub(i + 1, i + 1) < '0' or fmt:sub(i + 1, i + 1) > '9' then + -- not followed by a digit? + i = i + 1 + b[#b + 1] = fmt:sub(i, i) + else + i = i + 1 + local l = fmt:sub(i, i) - '0'; -- capture index + if l > n then + error(("invalid capture index (%d)"):format(l), 0) + elseif cps[l + 1].isstring then + b[#b + 1] = cs.s and cs.s:sub(cps[l + 1].startstr, cps[l + 1].endstr - cps[l + 1].startstr + cps[l + 1].startstr - 1) or + cs.stream(cps[l + 1].startstr, cps[l + 1].endstr - cps[l + 1].startstr + cps[l + 1].startstr - 1) + else + local curr = cs.cap; + cs.cap = cps[l + 1].origcap; -- go back to evaluate that nested capture + if not addonestring(cs, b, "capture", valuetable) then + error(("no values in capture index %d"):format(l), 0) + end + cs.cap = curr; -- continue from where it stopped + end + end + i = i + 1 + end +end + + +-- Substitution capture: add result to buffer 'b' + +local function substcap(cs, b, valuetable) + local curr = cs.ocap[cs.cap].s; + -- no nested captures? + if cs.ocap[cs.cap].siz ~= 0 then + -- keep original text + b[#b + 1] = cs.s and cs.s:sub(curr, cs.ocap[cs.cap].siz - 1 + curr - 1) or + cs.stream(curr, cs.ocap[cs.cap].siz - 1 + curr - 1) + else + cs.cap = cs.cap + 1 -- skip open entry + -- traverse nested captures + while cs.ocap[cs.cap].kind ~= Cclose do + local next = cs.ocap[cs.cap].s; + b[#b + 1] = cs.s and cs.s:sub(curr, next - curr + curr - 1) or + cs.stream(curr, next - curr + curr - 1) -- add text up to capture + if addonestring(cs, b, "replacement", valuetable) then + curr = cs.ocap[cs.cap - 1].s + cs.ocap[cs.cap - 1].siz - 1; -- continue after match + else + -- no capture value + curr = next; -- keep original text in final result + end + end + b[#b + 1] = cs.s and cs.s:sub(curr, curr + cs.ocap[cs.cap].s - curr - 1) or + cs.stream(curr, curr + cs.ocap[cs.cap].s - curr - 1) -- add last piece of text + end + cs.cap = cs.cap + 1 -- go to next capture +end + + +-- Evaluates a capture and adds its first value to buffer 'b'; returns +-- whether there was a value + +function addonestring(cs, b, what, valuetable) + local tag = cs.ocap[cs.cap].kind + if tag == Cstring then + stringcap(cs, b, valuetable); -- add capture directly to buffer + return 1 + elseif tag == Csubst then + substcap(cs, b, valuetable); -- add capture directly to buffer + return 1 + else + local subout = { outindex = 0, out = {} } + local n = pushcapture(cs, subout, valuetable); + if n > 0 then + if type(subout.out[1]) ~= 'string' and type(subout.out[1]) ~= 'number' then + error(("invalid %s value (a %s)"):format(what, type(subout.out[1])), 0) + end + b[#b + 1] = subout.out[1] + return n + end + end +end + + +-- Push all values of the current capture into the stack; returns +-- number of values pushed + +function pushcapture(cs, out, valuetable) + local type = cs.ocap[cs.cap].kind + if type == Cposition then + out.outindex = out.outindex + 1 + out.out[out.outindex] = cs.ocap[cs.cap].s + cs.cap = cs.cap + 1; + return 1; + elseif type == Cconst then + out.outindex = out.outindex + 1 + out.out[out.outindex] = valuetable[cs.ocap[cs.cap].idx] + cs.cap = cs.cap + 1 + return 1; + elseif type == Carg then + local arg = valuetable[cs.ocap[cs.cap].idx] + cs.cap = cs.cap + 1 + if arg > cs.ptopcount then + error(("reference to absent extra argument #%d"):format(arg), 0) + end + out.outindex = out.outindex + 1 + out.out[out.outindex] = cs.ptop[arg] + return 1; + elseif type == Csimple then + local k = pushnestedvalues(cs, true, out, valuetable) + local index = out.outindex + table.insert(out.out, index - k + 1, out.out[index]) + out[index + 1] = nil + return k; + elseif type == Cruntime then + out.outindex = out.outindex + 1 + out.out[out.outindex] = valuetable[cs.ocap[cs.cap].idx] + cs.cap = cs.cap + 1; + return 1; + elseif type == Cstring then + local b = {} + stringcap(cs, b, valuetable) + out.outindex = out.outindex + 1 + out.out[out.outindex] = table.concat(b) + return 1; + elseif type == Csubst then + local b = {} + substcap(cs, b, valuetable); + out.outindex = out.outindex + 1 + out.out[out.outindex] = table.concat(b) + return 1; + elseif type == Cgroup then + -- anonymous group? + if cs.ocap[cs.cap].idx == 0 then + return pushnestedvalues(cs, false, out, valuetable); -- add all nested values + else + -- named group: add no values + nextcap(cs); -- skip capture + return 0 + end + elseif type == Cbackref then + return backrefcap(cs, out, valuetable) + elseif type == Ctable then + return tablecap(cs, out, valuetable) + elseif type == Cfunction then + return functioncap(cs, out, valuetable) + elseif type == Cnum then + return numcap(cs, out, valuetable) + elseif type == Cquery then + return querycap(cs, out, valuetable) + elseif type == Cfold then + return foldcap(cs, out, valuetable) + else + assert(false) + end +end + + +-- Prepare a CapState structure and traverse the entire list of +-- captures in the stack pushing its results. 's' is the subject +-- string, 'r' is the final position of the match, and 'ptop' +-- the index in the stack where some useful values were pushed. +-- Returns the number of results pushed. (If the list produces no +-- results, push the final position of the match.) + +local function getcaptures(capture, s, stream, r, valuetable, ...) + local n = 0; + local cs = { cap = 0 } + local out = { outindex = 0; out = {} } + -- is there any capture? + if capture[cs.cap].kind ~= Cclose then + cs.ocap = capture + cs.s = s; + cs.stream = stream + cs.ptopcount, cs.ptop = retcount(...) + repeat -- collect their values + n = n + pushcapture(cs, out, valuetable) + until cs.ocap[cs.cap].kind == Cclose + end + -- no capture values? + if n == 0 then + if not r then + return + else + return r + end + end + assert(out.outindex < 7998, "(too many captures)") + return unpack(out.out, 1, out.outindex) +end + +local function getcapturesruntime(capture, s, stream, notdelete, min, max, captop, valuetable, ...) + local n = 0; + local cs = { cap = min } + local out = { outindex = 0; out = {} } + cs.ocap = capture + cs.s = s + cs.stream = stream + cs.ptopcount, cs.ptop = retcount(...) + local start = 0 + repeat -- collect their values + if not checknextcap(cs, max) then break end + local notdelete = notdelete or capture[cs.cap].kind == Cgroup and capture[cs.cap].idx ~= 0 and capture[cs.cap].candelete == 0 + pushcapture(cs, out, valuetable) + if notdelete then + start = cs.cap + else + n = n + cs.cap - start + for i = 0, captop - cs.cap - 1 do + ffi.copy(capture + start + i, capture + cs.cap + i, ffi.sizeof('CAPTURE')) + end + max = max - (cs.cap - start) + captop = captop - (cs.cap - start) + cs.cap = start + end + until cs.cap == max + assert(out.outindex < 7998, "(too many captures)") + return n, out.out, out.outindex +end + +return { + getcaptures = getcaptures, + runtimecap = runtimecap, + getcapturesruntime = getcapturesruntime, +} + diff --git a/tools/lpeg/lpcode.lua b/tools/lpeg/lpcode.lua new file mode 100644 index 0000000..bd0639f --- /dev/null +++ b/tools/lpeg/lpcode.lua @@ -0,0 +1,1057 @@ +--[[ +LPEGLJ +lpcode.lua +Generating code from tree +Copyright (C) 2014 Rostislav Sacek. +based on LPeg v1.0 - PEG pattern matching for Lua +Lua.org & PUC-Rio written by Roberto Ierusalimschy +http://www.inf.puc-rio.br/~roberto/lpeg/ + +** Permission is hereby granted, free of charge, to any person obtaining +** a copy of this software and associated documentation files (the +** "Software"), to deal in the Software without restriction, including +** without limitation the rights to use, copy, modify, merge, publish, +** distribute, sublicense, and/or sell copies of the Software, and to +** permit persons to whom the Software is furnished to do so, subject to +** the following conditions: +** +** The above copyright notice and this permission notice shall be +** included in all copies or substantial portions of the Software. +** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +** SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +** +** [ MIT license: http://www.opensource.org/licenses/mit-license.php ] +--]] +local ffi = require "ffi" +require "lpvm" + +local band, bor, bnot, rshift, lshift = bit.band, bit.bor, bit.bnot, bit.rshift, bit.lshift + +local TChar = 0 +local TSet = 1 +local TAny = 2 -- standard PEG elements +local TTrue = 3 +local TFalse = 4 +local TRep = 5 +local TSeq = 6 +local TChoice = 7 +local TNot = 8 +local TAnd = 9 +local TCall = 10 +local TOpenCall = 11 +local TRule = 12 -- sib1 is rule's pattern, sib2 is 'next' rule +local TGrammar = 13 -- sib1 is initial (and first) rule +local TBehind = 14 -- match behind +local TCapture = 15 -- regular capture +local TRunTime = 16 -- run-time capture + + +local IAny = 0 -- if no char, fail +local IChar = 1 -- if char != val, fail +local ISet = 2 -- if char not in val, fail +local ITestAny = 3 -- in no char, jump to 'offset' +local ITestChar = 4 -- if char != val, jump to 'offset' +local ITestSet = 5 -- if char not in val, jump to 'offset' +local ISpan = 6 -- read a span of chars in val +local IBehind = 7 -- walk back 'val' characters (fail if not possible) +local IRet = 8 -- return from a rule +local IEnd = 9 -- end of pattern +local IChoice = 10 -- stack a choice; next fail will jump to 'offset' +local IJmp = 11 -- jump to 'offset' +local ICall = 12 -- call rule at 'offset' +local IOpenCall = 13 -- call rule number 'offset' (must be closed to a ICall) +local ICommit = 14 -- pop choice and jump to 'offset' +local IPartialCommit = 15 -- update top choice to current position and jump +local IBackCommit = 16 -- "fails" but jump to its own 'offset' +local IFailTwice = 17 -- pop one choice and then fail +local IFail = 18 -- go back to saved state on choice and jump to saved offset +local IGiveup = 19 -- internal use +local IFullCapture = 20 -- complete capture of last 'off' chars +local IOpenCapture = 21 -- start a capture +local ICloseCapture = 22 +local ICloseRunTime = 23 + + +local Cclose = 0 +local Cposition = 1 +local Cconst = 2 +local Cbackref = 3 +local Carg = 4 +local Csimple = 5 +local Ctable = 6 +local Cfunction = 7 +local Cquery = 8 +local Cstring = 9 +local Cnum = 10 +local Csubst = 11 +local Cfold = 12 +local Cruntime = 13 +local Cgroup = 14 + + +local PEnullable = 0 +local PEnofail = 1 +local RuleLR = 0x10000 +local NOINST = -2 + + +local MAXBEHINDPREDICATE = 255 +local MAXRULES = 200 +local MAXOFF = 0xF + +-- number of siblings for each tree +local numsiblings = { + 0, 0, 0, -- char, set, any + 0, 0, -- true, false + 1, -- rep + 2, 2, -- seq, choice + 1, 1, -- not, and + 0, 0, 2, 1, -- call, opencall, rule, grammar + 1, -- behind + 1, 1 -- capture, runtime capture +} + + +local patternelement = ffi.typeof('PATTERN_ELEMENT') +local pattern = ffi.typeof('PATTERN') +local settype = ffi.typeof('int32_t[8]') +local fullset = settype(-1, -1, -1, -1, -1, -1, -1, -1) + +-- {====================================================== +-- Analysis and some optimizations +-- ======================================================= + +local codegen + + +-- Check whether a charset is empty (IFail), singleton (IChar), +-- full (IAny), or none of those (ISet). + +local function charsettype(cs) + local count = 0; + local candidate = -1; -- candidate position for a char + for i = 0, 8 - 1 do + local b = cs[i]; + if b == 0 then + if count > 1 then + return ISet; -- else set is still empty + end + elseif b == -1 then + if count < (i * 32) then + return ISet; + else + count = count + 32; -- set is still full + end + -- byte has only one bit? + elseif band(b, (b - 1)) == 0 then + if count > 0 then + return ISet; -- set is neither full nor empty + -- set has only one char till now; track it + else + count = count + 1; + candidate = i; + end + else + return ISet; -- byte is neither empty, full, nor singleton + end + end + if count == 0 then + return IFail, 0 -- empty set + -- singleton; find character bit inside byte + elseif count == 1 then + local b = cs[candidate]; + local c = candidate * 32; + for i = 1, 32 do + if b == 1 then + c = c + i - 1 + break + end + b = rshift(b, 1) + end + return IChar, c + elseif count == 256 then + return IAny, 0 -- full set + else + assert(false) -- should have returned by now + end +end + + +-- A few basic operations on Charsets + +local function cs_complement(cs) + for i = 0, 8 - 1 do + cs[i] = bnot(cs[i]) + end +end + + +local function cs_equal(cs1, cs2) + for i = 0, 8 - 1 do + if cs1[i] ~= cs2[i] then + return + end + end + return true +end + + +-- computes whether sets st1 and st2 are disjoint + +local function cs_disjoint(st1, st2) + for i = 0, 8 - 1 do + if band(st1[i], st2[i]) ~= 0 then + return + end + end + return true +end + + +-- Convert a 'char' pattern (TSet, TChar, TAny) to a charset + +local function tocharset(tree, index, valuetable) + local val = settype() + if tree.p[index].tag == TSet then + ffi.copy(val, valuetable[tree.p[index].val], ffi.sizeof(val)) + return val + elseif tree.p[index].tag == TChar then + local b = tree.p[index].val + -- only one char + -- add that one + val[rshift(b, 5)] = lshift(1, band(b, 31)) + return val + elseif tree.p[index].tag == TAny then + ffi.fill(val, ffi.sizeof(val), 0xff) + return val + end +end + + +-- checks whether a pattern has captures + +local function hascaptures(tree, index) + if tree.p[index].tag == TCapture or tree.p[index].tag == TRunTime then + return true + elseif tree.p[index].tag == TCall then + return hascaptures(tree, index + tree.p[index].ps) + else + local ns = numsiblings[tree.p[index].tag + 1] + if ns == 0 then + return + elseif ns == 1 then + return hascaptures(tree, index + 1) + elseif ns == 2 then + if hascaptures(tree, index + 1) then + return true + else + return hascaptures(tree, index + tree.p[index].ps) + end + else + assert(false) + end + end +end + + +-- Checks how a pattern behaves regarding the empty string, +-- in one of two different ways: +-- A pattern is *nullable* if it can match without consuming any character; +-- A pattern is *nofail* if it never fails for any string +-- (including the empty string). +-- The difference is only for predicates; for patterns without +-- predicates, the two properties are equivalent. +-- (With predicates, &'a' is nullable but not nofail. Of course, +-- nofail => nullable.) +-- These functions are all convervative in the following way: +-- p is nullable => nullable(p) +-- nofail(p) => p cannot fail +-- (The function assumes that TOpenCall and TRunTime are not nullable: +-- TOpenCall must be checked again when the grammar is fixed; +-- TRunTime is an arbitrary choice.) + +local function checkaux(tree, pred, index, lrcall) + lrcall = lrcall or {} + local tag = tree.p[index].tag + if tag == TChar or tag == TSet or tag == TAny or + tag == TFalse or tag == TOpenCall then + return -- not nullable + elseif tag == TRep or tag == TTrue then + return true -- no fail + elseif tag == TNot or tag == TBehind then + -- can match empty, but may fail + if pred == PEnofail then + return + else + return true -- PEnullable + end + elseif tag == TAnd then + -- can match empty; fail iff body does + if pred == PEnullable then + return true + else + return checkaux(tree, pred, index + 1, lrcall) + end + -- can fail; match empty iff body does + elseif tag == TRunTime then + if pred == PEnofail then + return + else + return checkaux(tree, pred, index + 1, lrcall) + end + elseif tag == TSeq then + if not checkaux(tree, pred, index + 1, lrcall) then + return + else + return checkaux(tree, pred, index + tree.p[index].ps, lrcall) + end + elseif tag == TChoice then + if checkaux(tree, pred, index + tree.p[index].ps, lrcall) then + return true + else + return checkaux(tree, pred, index + 1, lrcall) + end + elseif tag == TCapture or tag == TGrammar or tag == TRule then + return checkaux(tree, pred, index + 1, lrcall) + elseif tag == TCall then + --left recursive rule + if bit.band(tree.p[index].cap, 0xffff) ~= 0 then + local lr = index + tree.p[index].ps + if lrcall[lr] then + return + end + lrcall[lr] = true + end + return checkaux(tree, pred, index + tree.p[index].ps, lrcall) + else + assert(false) + end +end + + +-- number of characters to match a pattern (or -1 if variable) +-- ('count' avoids infinite loops for grammars) + +local function fixedlenx(tree, count, len, index) + local tag = tree.p[index].tag + if tag == TChar or tag == TSet or tag == TAny then + return len + 1; + elseif tag == TFalse or tag == TTrue or tag == TNot or tag == TAnd or tag == TBehind then + return len; + elseif tag == TRep or tag == TRunTime or tag == TOpenCall then + return -1; + elseif tag == TCapture or tag == TRule or tag == TGrammar then + return fixedlenx(tree, count, len, index + 1) + elseif tag == TCall then + if count >= MAXRULES then + return -1; -- may be a loop + else + return fixedlenx(tree, count + 1, len, index + tree.p[index].ps) + end + elseif tag == TSeq then + len = fixedlenx(tree, count, len, index + 1) + if (len < 0) then + return -1; + else + return fixedlenx(tree, count, len, index + tree.p[index].ps) + end + elseif tag == TChoice then + local n1 = fixedlenx(tree, count, len, index + 1) + if n1 < 0 then return -1 end + local n2 = fixedlenx(tree, count, len, index + tree.p[index].ps) + if n1 == n2 then + return n1 + else + return -1 + end + else + assert(false) + end +end + + +-- Computes the 'first set' of a pattern. +-- The result is a conservative aproximation: +-- match p ax -> x' for some x ==> a in first(p). +-- match p '' -> '' ==> returns 1. +-- The set 'follow' is the first set of what follows the +-- pattern (full set if nothing follows it) + +local function getfirst(tree, follow, index, valuetable, lrcall) + lrcall = lrcall or {} + local tag = tree.p[index].tag + if tag == TChar or tag == TSet or tag == TAny then + local firstset = tocharset(tree, index, valuetable) + return 0, firstset + elseif tag == TTrue then + local firstset = settype() + ffi.copy(firstset, follow, ffi.sizeof(firstset)) + return 1, firstset + elseif tag == TFalse then + local firstset = settype() + return 0, firstset + elseif tag == TChoice then + local e1, firstset = getfirst(tree, follow, index + 1, valuetable, lrcall) + local e2, csaux = getfirst(tree, follow, index + tree.p[index].ps, valuetable, lrcall) + for i = 0, 8 - 1 do + firstset[i] = bor(firstset[i], csaux[i]) + end + return bor(e1, e2), firstset + elseif tag == TSeq then + if not checkaux(tree, PEnullable, index + 1) then + return getfirst(tree, fullset, index + 1, valuetable, lrcall) + -- FIRST(p1 p2, fl) = FIRST(p1, FIRST(p2, fl)) + else + local e2, csaux = getfirst(tree, follow, index + tree.p[index].ps, valuetable, lrcall) + local e1, firstset = getfirst(tree, csaux, index + 1, valuetable, lrcall) + if e1 == 0 then -- 'e1' ensures that first can be used + return 0, firstset + -- one of the children has a matchtime? + elseif band(bor(e1, e2), 2) == 2 then + return 2, firstset -- pattern has a matchtime capture + else + return e2, firstset -- else depends on 'e2' + end + end + elseif tag == TRep then + local _, firstset = getfirst(tree, follow, index + 1, valuetable, lrcall) + for i = 0, 8 - 1 do + firstset[i] = bor(firstset[i], follow[i]) + end + return 1, firstset -- accept the empty string + elseif tag == TCapture or tag == TGrammar or tag == TRule then + return getfirst(tree, follow, index + 1, valuetable, lrcall) + -- function invalidates any follow info. + elseif tag == TRunTime then + local e, firstset = getfirst(tree, fullset, index + 1, valuetable, lrcall) + if e ~= 0 then + return 2, firstset -- function is not "protected"? + else + return 0, firstset -- pattern inside capture ensures first can be used + end + elseif tag == TCall then + -- left recursive rule + if bit.band(tree.p[index].cap, 0xffff) ~= 0 then + local lr = index + tree.p[index].ps + if lrcall[lr] then + return 0, settype() + else + lrcall[lr] = true + end + end + return getfirst(tree, follow, index + tree.p[index].ps, valuetable, lrcall) + elseif tag == TAnd then + local e, firstset = getfirst(tree, follow, index + 1, valuetable, lrcall) + for i = 0, 8 - 1 do + firstset[i] = band(firstset[i], follow[i]) + end + return e, firstset + elseif tag == TNot then + local firstset = tocharset(tree, index + 1, valuetable) + if firstset then + cs_complement(firstset) + return 1, firstset + end + local e, firstset = getfirst(tree, follow, index + 1, valuetable, lrcall) + ffi.copy(firstset, follow, ffi.sizeof(firstset)) + return bor(e, 1), firstset -- always can accept the empty string + -- instruction gives no new information + elseif tag == TBehind then + -- call 'getfirst' to check for math-time captures + local e, firstset = getfirst(tree, follow, index + 1, valuetable, lrcall) + ffi.copy(firstset, follow, ffi.sizeof(firstset)) + return bor(e, 1), firstset -- always can accept the empty string + else + assert(false) + end +end + + +-- If it returns true, then pattern can fail only depending on the next +-- character of the subject + +local function headfail(tree, index, lrcall) + lrcall = lrcall or {} + local tag = tree.p[index].tag + if tag == TChar or tag == TSet or tag == TAny or tag == TFalse then + return true + elseif tag == TTrue or tag == TRep or tag == TRunTime or tag == TNot or tag == TBehind then + return + elseif tag == TCapture or tag == TGrammar or tag == TRule or tag == TAnd then + return headfail(tree, index + 1, lrcall) + elseif tag == TCall then + -- left recursive rule + if bit.band(tree.p[index].cap, 0xffff) ~= 0 then + local lr = index + tree.p[index].ps + if lrcall[lr] then + return true + else + lrcall[lr] = true + end + end + return headfail(tree, index + tree.p[index].ps, lrcall) + elseif tag == TSeq then + if not checkaux(tree, PEnofail, index + tree.p[index].ps) then + return + else + return headfail(tree, index + 1, lrcall) + end + elseif tag == TChoice then + if not headfail(tree, index + 1, lrcall) then + return + else + return headfail(tree, index + tree.p[index].ps, lrcall) + end + else + assert(false) + end +end + + +-- Check whether the code generation for the given tree can benefit +-- from a follow set (to avoid computing the follow set when it is +-- not needed) + +local function needfollow(tree, index) + local tag = tree.p[index].tag + if tag == TChar or tag == TSet or tag == TAny or tag == TFalse or tag == TTrue or tag == TAnd or tag == TNot or + tag == TRunTime or tag == TGrammar or tag == TCall or tag == TBehind then + return + elseif tag == TChoice or tag == TRep then + return true + elseif tag == TCapture then + return needfollow(tree, index + 1) + elseif tag == TSeq then + return needfollow(tree, index + tree.p[index].ps) + else + assert(false) + end +end + +-- ====================================================== + + +-- {====================================================== +-- Code generation +-- ======================================================= + + +-- code generation is recursive; 'opt' indicates that the code is +-- being generated under a 'IChoice' operator jumping to its end. +-- 'tt' points to a previous test protecting this code. 'fl' is +-- the follow set of the pattern. + + +local function addinstruction(code, op, val) + local size = code.size + if size >= code.allocsize then + code:doublesize() + end + code.p[size].code = op + code.p[size].val = val + code.size = size + 1 + return size +end + + +local function setoffset(code, instruction, offset) + code.p[instruction].offset = offset; +end + + +-- Add a capture instruction: +-- 'op' is the capture instruction; 'cap' the capture kind; +-- 'key' the key into ktable; 'aux' is optional offset + +local function addinstcap(code, op, cap, key, aux) + local i = addinstruction(code, op, bor(cap, lshift(aux, 4))) + setoffset(code, i, key) + return i +end + + +local function jumptothere(code, instruction, target) + if instruction >= 0 then + setoffset(code, instruction, target - instruction) + end +end + + +local function jumptohere(code, instruction) + jumptothere(code, instruction, code.size) +end + + +-- Code an IChar instruction, or IAny if there is an equivalent +-- test dominating it + +local function codechar(code, c, tt) + assert(tt ~= -1) + if tt >= 0 and code.p[tt].code == ITestChar and + code.p[tt].val == c then + addinstruction(code, IAny, 0) + else + addinstruction(code, IChar, c) + end +end + + +-- Code an ISet instruction + +local function coderealcharset(code, cs, valuetable) + local ind = #valuetable + 1 + valuetable[ind] = cs + return addinstruction(code, ISet, ind) +end + + +-- code a char set, optimizing unit sets for IChar, "complete" +-- sets for IAny, and empty sets for IFail; also use an IAny +-- when instruction is dominated by an equivalent test. + +local function codecharset(code, cs, tt, valuetable) + local op, c = charsettype(cs) + if op == IChar then + codechar(code, c, tt) + elseif op == ISet then + assert(tt ~= -1) + if tt >= 0 and code.p[tt].code == ITestSet and + cs_equal(cs, valuetable[code.p[tt].val]) then + addinstruction(code, IAny, 0) + else + coderealcharset(code, cs, valuetable) + end + else + addinstruction(code, op, c) + end +end + + +-- code a test set, optimizing unit sets for ITestChar, "complete" +-- sets for ITestAny, and empty sets for IJmp (always fails). +-- 'e' is true iff test should accept the empty string. (Test +-- instructions in the current VM never accept the empty string.) + +local function codetestset(code, cs, e, valuetable) + if e ~= 0 then + return NOINST -- no test + else + local pos = code.size + codecharset(code, cs, NOINST, valuetable) + local inst = code.p[pos] + local code = inst.code + if code == IFail then + inst.code = IJmp -- always jump + elseif code == IAny then + inst.code = ITestAny + elseif code == IChar then + inst.code = ITestChar + elseif code == ISet then + inst.code = ITestSet + else + assert(false) + end + return pos + end +end + + +-- Find the final destination of a sequence of jumps + +local function finaltarget(code, i) + while code.p[i].code == IJmp do + i = i + code.p[i].offset + end + return i +end + + +-- final label (after traversing any jumps) + +local function finallabel(code, i) + return finaltarget(code, i + code.p[i].offset) +end + +-- (where n = fixedlen(p))
+
+local function codebehind(code, tree, index, valuetable)
+ if tree.p[index].val > 0 then
+ addinstruction(code, IBehind, tree.p[index].val)
+ end
+ codegen(code, tree, fullset, false, NOINST, index + 1, valuetable) -- NOINST
+end
+
+
+-- Choice; optimizations:
+-- - when p1 is headfail
+-- - when first(p1) and first(p2) are disjoint; than
+-- a character not in first(p1) cannot go to p1, and a character
+-- in first(p1) cannot go to p2 (at it is not in first(p2)).
+-- (The optimization is not valid if p1 accepts the empty string,
+-- as then there is no character at all...)
+-- - when p2 is empty and opt is true; a IPartialCommit can resuse
+-- the Choice already active in the stack.
+
+local function codechoice(code, tree, fl, opt, p1, p2, valuetable)
+ local emptyp2 = tree.p[p2].tag == TTrue
+ local e1, st1 = getfirst(tree, fullset, p1, valuetable)
+ local _, st2 = getfirst(tree, fl, p2, valuetable)
+ if headfail(tree, p1) or (e1 == 0 and cs_disjoint(st1, st2)) then
+ -- ; behind n
+-- (valid only when 'p' has no captures)
+
+local function codeand(code, tree, tt, index, valuetable)
+ local n = fixedlenx(tree, 0, 0, index)
+ if n >= 0 and n <= MAXBEHINDPREDICATE and not hascaptures(tree, index) then
+ codegen(code, tree, fullset, false, tt, index, valuetable)
+ if n > 0 then
+ addinstruction(code, IBehind, n)
+ end
+ else
+ -- default: Choice L1; p1; BackCommit L2; L1: Fail; L2:
+ local pchoice = addinstruction(code, IChoice, 0)
+ codegen(code, tree, fullset, false, tt, index, valuetable)
+ local pcommit = addinstruction(code, IBackCommit, 0)
+ jumptohere(code, pchoice)
+ addinstruction(code, IFail, 0)
+ jumptohere(code, pcommit)
+ end
+end
+
+
+-- Captures: if pattern has fixed (and not too big) length, use
+-- a single IFullCapture instruction after the match; otherwise,
+-- enclose the pattern with OpenCapture - CloseCapture.
+
+local function codecapture(code, tree, fl, tt, index, valuetable)
+ local len = fixedlenx(tree, 0, 0, index + 1)
+ if len >= 0 and len <= MAXOFF and not hascaptures(tree, index + 1) then
+ codegen(code, tree, fl, false, tt, index + 1, valuetable)
+ addinstcap(code, IFullCapture, tree.p[index].cap, tree.p[index].val, len)
+ else
+ addinstcap(code, IOpenCapture, tree.p[index].cap, tree.p[index].val, 0)
+ codegen(code, tree, fl, false, tt, index + 1, valuetable)
+ addinstcap(code, ICloseCapture, Cclose, 0, 0)
+ end
+end
+
+
+local function coderuntime(code, tree, tt, index, valuetable)
+ addinstcap(code, IOpenCapture, Cgroup, tree.p[index].val, 0)
+ codegen(code, tree, fullset, false, tt, index + 1, valuetable)
+ addinstcap(code, ICloseRunTime, Cclose, 0, 0)
+end
+
+
+-- Repetion; optimizations:
+-- When pattern is a charset, can use special instruction ISpan.
+-- When pattern is head fail, or if it starts with characters that
+-- are disjoint from what follows the repetions, a simple test
+-- is enough (a fail inside the repetition would backtrack to fail
+-- again in the following pattern, so there is no need for a choice).
+-- When 'opt' is true, the repetion can reuse the Choice already
+-- active in the stack.
+
+local function coderep(code, tree, opt, fl, index, valuetable)
+ local st = tocharset(tree, index, valuetable)
+ if st then
+ local op = coderealcharset(code, st, valuetable)
+ code.p[op].code = ISpan;
+ else
+ local e1, st = getfirst(tree, fullset, index, valuetable)
+ if headfail(tree, index) or (e1 == 0 and cs_disjoint(st, fl)) then
+ -- L1: test (fail(p1)) -> L2; ; jmp L1; L2:
+ local test = codetestset(code, st, 0, valuetable)
+ codegen(code, tree, fullset, false, test, index, valuetable)
+ local jmp = addinstruction(code, IJmp, 0)
+ jumptohere(code, test)
+ jumptothere(code, jmp, test)
+ else
+ -- test(fail(p1)) -> L2; choice L2; L1: ; partialcommit L1; L2:
+ -- or (if 'opt'): partialcommit L1; L1: ; partialcommit L1;
+ local test = codetestset(code, st, e1, valuetable)
+ local pchoice = NOINST;
+ if opt then
+ jumptohere(code, addinstruction(code, IPartialCommit, 0))
+ else
+ pchoice = addinstruction(code, IChoice, 0)
+ end
+ local l2 = code.size
+ codegen(code, tree, fullset, false, NOINST, index, valuetable)
+ local commit = addinstruction(code, IPartialCommit, 0)
+ jumptothere(code, commit, l2)
+ jumptohere(code, pchoice)
+ jumptohere(code, test)
+ end
+ end
+end
+
+
+-- Not predicate; optimizations:
+-- In any case, if first test fails, 'not' succeeds, so it can jump to
+-- the end. If pattern is headfail, that is all (it cannot fail
+-- in other parts); this case includes 'not' of simple sets. Otherwise,
+-- use the default code (a choice plus a failtwice).
+
+local function codenot(code, tree, index, valuetable)
+ local e, st = getfirst(tree, fullset, index, valuetable)
+ local test = codetestset(code, st, e, valuetable)
+ -- test (fail(p1)) -> L1; fail; L1:
+ if headfail(tree, index) then
+ addinstruction(code, IFail, 0)
+ else
+ -- test(fail(p))-> L1; choice L1; ; failtwice; L1:
+ local pchoice = addinstruction(code, IChoice, 0)
+ codegen(code, tree, fullset, false, NOINST, index, valuetable)
+ addinstruction(code, IFailTwice, 0)
+ jumptohere(code, pchoice)
+ end
+ jumptohere(code, test)
+end
+
+
+-- change open calls to calls, using list 'positions' to find
+-- correct offsets; also optimize tail calls
+
+local function correctcalls(code, positions, from, to)
+ for i = from, to - 1 do
+ if code.p[i].code == IOpenCall then
+ local n = code.p[i].offset; -- rule number
+ local rule = positions[n]; -- rule position
+ assert(rule == from or code.p[rule - 1].code == IRet)
+ -- call; ret ?
+ if bit.band(code.p[i].val, 0xffff) == 0 and code.p[finaltarget(code, i + 1)].code == IRet then
+ code.p[i].code = IJmp; -- tail call
+ else
+ code.p[i].code = ICall;
+ end
+ jumptothere(code, i, rule) -- call jumps to respective rule
+ end
+ end
+end
+
+
+-- Code for a grammar:
+-- call L1; jmp L2; L1: rule 1; ret; rule 2; ret; ...; L2:
+
+local function codegrammar(code, tree, index, valuetable)
+ local positions = {}
+ local rulenumber = 1;
+ -- tree.p[rule].tag
+ local rule = index + 1
+ assert(tree.p[rule].tag == TRule)
+ local LR = 0
+ if band(RuleLR, tree.p[rule].cap) ~= 0 then LR = 1 end
+ local firstcall = addinstruction(code, ICall, LR) -- call initial rule
+ code.p[firstcall].aux = tree.p[rule].val
+ local jumptoend = addinstruction(code, IJmp, 0) -- jump to the end
+ jumptohere(code, firstcall) -- here starts the initial rule
+ while tree.p[rule].tag == TRule do
+ positions[rulenumber] = code.size -- save rule position
+ rulenumber = rulenumber + 1
+ codegen(code, tree, fullset, false, NOINST, rule + 1, valuetable) -- code rule
+ addinstruction(code, IRet, 0)
+ rule = rule + tree.p[rule].ps
+ end
+ assert(tree.p[rule].tag == TTrue)
+ jumptohere(code, jumptoend)
+ correctcalls(code, positions, firstcall + 2, code.size)
+end
+
+
+local function codecall(code, tree, index, val)
+ local c = addinstruction(code, IOpenCall, tree.p[index].cap) -- to be corrected later
+ code.p[c].aux = val
+ assert(tree.p[index + tree.p[index].ps].tag == TRule)
+ setoffset(code, c, band(tree.p[index + tree.p[index].ps].cap, 0x7fff)) -- offset = rule number
+end
+
+
+local function codeseq(code, tree, fl, opt, tt, p1, p2, valuetable)
+ if needfollow(tree, p1) then
+ local _, fll = getfirst(tree, fl, p2, valuetable) -- p1 follow is p2 first
+ codegen(code, tree, fll, false, tt, p1, valuetable)
+ else
+ -- use 'fullset' as follow
+ codegen(code, tree, fullset, false, tt, p1, valuetable)
+ end
+ -- can p1 consume anything?
+ if (fixedlenx(tree, 0, 0, p1) ~= 0) then
+ tt = NOINST; -- invalidate test
+ end
+ return codegen(code, tree, fl, opt, tt, p2, valuetable)
+end
+
+
+-- Main code-generation function: dispatch to auxiliar functions
+-- according to kind of tree
+
+-- code generation is recursive; 'opt' indicates that the code is being
+-- generated as the last thing inside an optional pattern (so, if that
+-- code is optional too, it can reuse the 'IChoice' already in place for
+-- the outer pattern). 'tt' points to a previous test protecting this
+-- code (or NOINST). 'fl' is the follow set of the pattern.
+
+function codegen(code, tree, fl, opt, tt, index, valuetable)
+ local tag = tree.p[index].tag
+ if tag == TChar then
+ return codechar(code, tree.p[index].val, tt)
+ elseif tag == TAny then
+ return addinstruction(code, IAny, 0)
+ elseif tag == TSet then
+ return codecharset(code, valuetable[tree.p[index].val], tt, valuetable)
+ elseif tag == TTrue then
+ elseif tag == TFalse then
+ return addinstruction(code, IFail, 0)
+ elseif tag == TSeq then
+ return codeseq(code, tree, fl, opt, tt, index + 1, index + tree.p[index].ps, valuetable)
+ elseif tag == TChoice then
+ return codechoice(code, tree, fl, opt, index + 1, index + tree.p[index].ps, valuetable)
+ elseif tag == TRep then
+ return coderep(code, tree, opt, fl, index + 1, valuetable)
+ elseif tag == TBehind then
+ return codebehind(code, tree, index, valuetable)
+ elseif tag == TNot then
+ return codenot(code, tree, index + 1, valuetable)
+ elseif tag == TAnd then
+ return codeand(code, tree, tt, index + 1, valuetable)
+ elseif tag == TCapture then
+ return codecapture(code, tree, fl, tt, index, valuetable)
+ elseif tag == TRunTime then
+ return coderuntime(code, tree, tt, index, valuetable)
+ elseif tag == TGrammar then
+ return codegrammar(code, tree, index, valuetable)
+ elseif tag == TCall then
+ return codecall(code, tree, index, tree.p[index].val)
+ else
+ assert(false)
+ end
+end
+
+
+-- Optimize jumps and other jump-like instructions.
+-- * Update labels of instructions with labels to their final
+-- destinations (e.g., choice L1; ... L1: jmp L2: becomes
+-- choice L2)
+-- * Jumps to other instructions that do jumps become those
+-- instructions (e.g., jump to return becomes a return; jump
+-- to commit becomes a commit)
+
+local function peephole(code)
+ local i = 0
+ while i < code.size do
+ local tag = code.p[i].code
+ if tag == IChoice or tag == ICall or tag == ICommit or tag == IPartialCommit or
+ tag == IBackCommit or tag == ITestChar or tag == ITestSet or tag == ITestAny then
+ -- instructions with labels
+ jumptothere(code, i, finallabel(code, i)) -- optimize label
+
+ elseif tag == IJmp then
+ local ft = finaltarget(code, i)
+ local tag = code.p[ft].code -- jumping to what?
+ -- instructions with unconditional implicit jumps
+ if tag == IRet or tag == IFail or tag == IFailTwice or tag == IEnd then
+ ffi.copy(code.p + i, code.p + ft, ffi.sizeof(patternelement)) -- jump becomes that instruction
+ elseif tag == ICommit or tag == IPartialCommit or tag == IBackCommit then
+ -- inst. with unconditional explicit jumps
+ local fft = finallabel(code, ft)
+ ffi.copy(code.p + i, code.p + ft, ffi.sizeof(patternelement)) -- jump becomes that instruction...
+ jumptothere(code, i, fft) -- but must correct its offset
+ i = i - 1 -- reoptimize its label
+ else
+ jumptothere(code, i, ft) -- optimize label
+ end
+ end
+ i = i + 1
+ end
+end
+
+
+-- Compile a pattern
+
+local function compile(tree, index, valuetable)
+ local code = pattern()
+ codegen(code, tree, fullset, false, NOINST, index, valuetable)
+ addinstruction(code, IEnd, 0)
+ peephole(code)
+ ffi.C.free(tree.code)
+ tree.code = code
+end
+
+local function pat_new(ct, size)
+ size = size or 0
+ local allocsize = size
+ if allocsize < 10 then
+ allocsize = 10
+ end
+ local pat = ffi.cast('PATTERN*', ffi.C.malloc(ffi.sizeof(pattern)))
+ assert(pat ~= nil)
+ pat.allocsize = allocsize
+ pat.size = size
+ pat.p = ffi.C.malloc(ffi.sizeof(patternelement) * allocsize)
+ assert(pat.p ~= nil)
+ ffi.fill(pat.p, ffi.sizeof(patternelement) * allocsize)
+ return pat
+end
+
+local function doublesize(ct)
+ ct.p = ffi.C.realloc(ct.p, ffi.sizeof(patternelement) * ct.allocsize * 2)
+ assert(ct.p ~= nil)
+ ffi.fill(ct.p + ct.allocsize, ffi.sizeof(patternelement) * ct.allocsize)
+ ct.allocsize = ct.allocsize * 2
+end
+
+local pattreg = {
+ doublesize = doublesize,
+}
+
+local metareg = {
+ ["__new"] = pat_new,
+ ["__index"] = pattreg
+}
+
+ffi.metatype(pattern, metareg)
+
+return {
+ checkaux = checkaux,
+ tocharset = tocharset,
+ fixedlenx = fixedlenx,
+ hascaptures = hascaptures,
+ compile = compile,
+}
\ No newline at end of file
diff --git a/tools/lpeg/lpeg.lua b/tools/lpeg/lpeg.lua
new file mode 100644
index 0000000..de4fca3
--- /dev/null
+++ b/tools/lpeg/lpeg.lua
@@ -0,0 +1,1373 @@
+--[[
+LPEGLJ
+lpeglj.lua
+Main module and tree generation
+Copyright (C) 2014 Rostislav Sacek.
+based on LPeg v1.0 - PEG pattern matching for Lua
+Lua.org & PUC-Rio written by Roberto Ierusalimschy
+http://www.inf.puc-rio.br/~roberto/lpeg/
+
+** Permission is hereby granted, free of charge, to any person obtaining
+** a copy of this software and associated documentation files (the
+** "Software"), to deal in the Software without restriction, including
+** without limitation the rights to use, copy, modify, merge, publish,
+** distribute, sublicense, and/or sell copies of the Software, and to
+** permit persons to whom the Software is furnished to do so, subject to
+** the following conditions:
+**
+** The above copyright notice and this permission notice shall be
+** included in all copies or substantial portions of the Software.
+**
+** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+** SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+**
+** [ MIT license: http://www.opensource.org/licenses/mit-license.php ]
+--]]
+
+assert(jit.version_num > 20000, "Use LuaJIT v2.0.1 or higher.")
+
+local ffi = require "ffi"
+local lpcode = require "lpcode"
+local lpprint = require "lpprint"
+local lpvm = require "lpvm"
+
+local band, bor, bnot, rshift, lshift = bit.band, bit.bor, bit.bnot, bit.rshift, bit.lshift
+
+ffi.cdef [[
+ int isalnum(int c);
+ int isalpha(int c);
+ int iscntrl(int c);
+ int isdigit(int c);
+ int isgraph(int c);
+ int islower(int c);
+ int isprint(int c);
+ int ispunct(int c);
+ int isspace(int c);
+ int isupper(int c);
+ int isxdigit(int c);
+]]
+
+local MAXBEHIND = 255
+local MAXRULES = 200
+local VERSION = "1.0.0.0LJ"
+
+local TChar = 0
+local TSet = 1
+local TAny = 2 -- standard PEG elements
+local TTrue = 3
+local TFalse = 4
+local TRep = 5
+local TSeq = 6
+local TChoice = 7
+local TNot = 8
+local TAnd = 9
+local TCall = 10
+local TOpenCall = 11
+local TRule = 12 -- sib1 is rule's pattern, sib2 is 'next' rule
+local TGrammar = 13 -- sib1 is initial (and first) rule
+local TBehind = 14 -- match behind
+local TCapture = 15 -- regular capture
+local TRunTime = 16 -- run-time capture
+
+local IAny = 0 -- if no char, fail
+local IChar = 1 -- if char != val, fail
+local ISet = 2 -- if char not in val, fail
+local ITestAny = 3 -- in no char, jump to 'offset'
+local ITestChar = 4 -- if char != val, jump to 'offset'
+local ITestSet = 5 -- if char not in val, jump to 'offset'
+local ISpan = 6 -- read a span of chars in val
+local IBehind = 7 -- walk back 'val' characters (fail if not possible)
+local IRet = 8 -- return from a rule
+local IEnd = 9 -- end of pattern
+local IChoice = 10 -- stack a choice; next fail will jump to 'offset'
+local IJmp = 11 -- jump to 'offset'
+local ICall = 12 -- call rule at 'offset'
+local IOpenCall = 13 -- call rule number 'offset' (must be closed to a ICall)
+local ICommit = 14 -- pop choice and jump to 'offset'
+local IPartialCommit = 15 -- update top choice to current position and jump
+local IBackCommit = 16 -- "fails" but jump to its own 'offset'
+local IFailTwice = 17 -- pop one choice and then fail
+local IFail = 18 -- go back to saved state on choice and jump to saved offset
+local IGiveup = 19 -- internal use
+local IFullCapture = 20 -- complete capture of last 'off' chars
+local IOpenCapture = 21 -- start a capture
+local ICloseCapture = 22
+local ICloseRunTime = 23
+
+local Cclose = 0
+local Cposition = 1
+local Cconst = 2
+local Cbackref = 3
+local Carg = 4
+local Csimple = 5
+local Ctable = 6
+local Cfunction = 7
+local Cquery = 8
+local Cstring = 9
+local Cnum = 10
+local Csubst = 11
+local Cfold = 12
+local Cruntime = 13
+local Cgroup = 14
+
+local PEnullable = 0
+local PEnofail = 1
+local PEleftrecursion = 2
+
+local newgrammar
+
+local RuleLR = 0x10000
+local Ruleused = 0x20000
+local BCapcandelete = 0x30000
+
+local LREnable = false
+
+-- number of siblings for each tree
+local numsiblings = {
+ 0, 0, 0, -- char, set, any
+ 0, 0, -- true, false
+ 1, -- rep
+ 2, 2, -- seq, choice
+ 1, 1, -- not, and
+ 0, 0, 2, 1, -- call, opencall, rule, grammar
+ 1, -- behind
+ 1, 1 -- capture, runtime capture
+}
+
+
+
+local patternid = 0
+local valuetable = {}
+
+local funcnames = setmetatable({}, { __mode = 'k' })
+
+local treepatternelement = ffi.typeof('TREEPATTERN_ELEMENT')
+local treepattern = ffi.typeof('TREEPATTERN')
+local patternelement = ffi.typeof('PATTERN_ELEMENT')
+local pattern = ffi.typeof('PATTERN')
+local settype = ffi.typeof('int32_t[8]')
+local uint32 = ffi.typeof('uint32_t[1]')
+
+-- Fix a TOpenCall into a TCall node, using table 'postable' to
+-- translate a key to its rule address in the tree. Raises an
+-- error if key does not exist.
+
+local function fixonecall(postable, grammar, index, valuetable)
+ local name = valuetable[grammar.p[index].val] -- get rule's name
+ local n = postable[name] -- query name in position table
+ -- no position?
+ if not n then
+ error(("rule '%s' undefined in given grammar"):format(type(name) == 'table' and '(a table)' or name), 0)
+ end
+ grammar.p[index].tag = TCall;
+ grammar.p[index].ps = n - index -- position relative to node
+ grammar.p[index + grammar.p[index].ps].cap = bit.bor(grammar.p[index + grammar.p[index].ps].cap, Ruleused)
+end
+
+
+-- Transform left associative constructions into right
+-- associative ones, for sequence and choice; that is:
+-- (t11 + t12) + t2 => t11 + (t12 + t2)
+-- (t11 * t12) * t2 => t11 * (t12 * t2)
+-- (that is, Op (Op t11 t12) t2 => Op t11 (Op t12 t2))
+
+local function correctassociativity(tree, index)
+ local t1 = index + 1
+ assert(tree.p[index].tag == TChoice or tree.p[index].tag == TSeq)
+ while tree.p[t1].tag == tree.p[index].tag do
+ local n1size = tree.p[index].ps - 1; -- t1 == Op t11 t12
+ local n11size = tree.p[t1].ps - 1;
+ local n12size = n1size - n11size - 1
+ for i = 1, n11size do
+ ffi.copy(tree.p + index + i, tree.p + t1 + i, ffi.sizeof(treepatternelement))
+ end
+ tree.p[index].ps = n11size + 1
+ tree.p[index + tree.p[index].ps].tag = tree.p[index].tag
+ tree.p[index + tree.p[index].ps].ps = n12size + 1
+ end
+end
+
+
+-- Make final adjustments in a tree. Fix open calls in tree,
+-- making them refer to their respective rules or raising appropriate
+-- errors (if not inside a grammar). Correct associativity of associative
+-- constructions (making them right associative).
+
+local function finalfix(fix, postable, grammar, index, valuetable)
+
+ local tag = grammar.p[index].tag
+ --subgrammars were already fixed
+ if tag == TGrammar then
+ return
+ elseif tag == TOpenCall then
+ -- inside a grammar?
+ if fix then
+ fixonecall(postable, grammar, index, valuetable)
+ -- open call outside grammar
+ else
+ error(("rule '%s' used outside a grammar"):format(tostring(valuetable[grammar.p[index].val])), 0)
+ end
+ elseif tag == TSeq or tag == TChoice then
+ correctassociativity(grammar, index)
+ end
+ local ns = numsiblings[tag + 1]
+ if ns == 0 then
+ elseif ns == 1 then
+ return finalfix(fix, postable, grammar, index + 1, valuetable)
+ elseif ns == 2 then
+ finalfix(fix, postable, grammar, index + 1, valuetable)
+ return finalfix(fix, postable, grammar, index + grammar.p[index].ps, valuetable)
+ else
+ assert(false)
+ end
+end
+
+
+-- {======================================================
+-- Tree generation
+-- =======================================================
+
+local function newcharset()
+ local tree = treepattern(1)
+ valuetable[tree.id] = { settype() }
+ tree.p[0].tag = TSet
+ tree.p[0].val = 1
+ return tree, valuetable[tree.id][1]
+end
+
+
+-- add to tree a sequence where first sibling is 'sib' (with size
+-- 'sibsize')
+
+local function seqaux(tree, sib, start, sibsize)
+ tree.p[start].tag = TSeq;
+ tree.p[start].ps = sibsize + 1
+ ffi.copy(tree.p + start + 1, sib.p, ffi.sizeof(treepatternelement) * sibsize)
+end
+
+
+-- Build a sequence of 'n' nodes, each with tag 'tag' and 'val' got
+-- from the array 's' (or 0 if array is NULL). (TSeq is binary, so it
+-- must build a sequence of sequence of sequence...)
+
+local function fillseq(tree, tag, start, n, s)
+ -- initial n-1 copies of Seq tag; Seq ...
+ for i = 1, n - 1 do
+ tree.p[start].tag = TSeq
+ tree.p[start].ps = 2
+ tree.p[start + 1].tag = tag
+ if s then
+ tree.p[start + 1].val = s:sub(i, i):byte()
+ end
+ start = start + tree.p[start].ps
+ end
+ tree.p[start].tag = tag -- last one does not need TSeq
+ if s then
+ tree.p[start].val = s:sub(n, n):byte()
+ end
+end
+
+
+-- Numbers as patterns:
+-- 0 == true (always match); n == TAny repeated 'n' times;
+-- -n == not (TAny repeated 'n' times)
+
+local function numtree(n)
+ if n == 0 then
+ local tree = treepattern(1)
+ tree.p[0].tag = TTrue
+ return tree
+ else
+ local tree, start
+ if n > 0 then
+ tree = treepattern(2 * n - 1)
+ start = 0
+ -- negative: code it as !(-n)
+ else
+ n = -n;
+ tree = treepattern(2 * n)
+ tree.p[0].tag = TNot
+ start = 1
+ end
+ fillseq(tree, TAny, start, n) -- sequence of 'n' any's
+ return tree;
+ end
+end
+
+
+-- Convert value to a pattern
+
+local function getpatt(val, name)
+ local typ = type(val)
+ if typ == 'string' then
+ -- empty?
+ if #val == 0 then
+ local pat = treepattern(1)
+ pat.p[0].tag = TTrue -- always match
+ return pat
+ else
+ local tree = treepattern(2 * (#val - 1) + 1)
+ fillseq(tree, TChar, 0, #val, val) -- sequence of '#val' chars
+ return tree
+ end
+ elseif typ == 'number' then
+ return numtree(val)
+ elseif typ == 'boolean' then
+ local pat = treepattern(1)
+ pat.p[0].tag = val and TTrue or TFalse
+ return pat
+ elseif typ == 'table' then
+ return newgrammar(val)
+ elseif typ == 'function' then
+ if name and type(name) == 'string' then
+ funcnames[val] = name
+ end
+ local pat = treepattern(2)
+ valuetable[pat.id] = { val }
+ pat.p[0].tag = TRunTime
+ pat.p[0].val = 1
+ pat.p[1].tag = TTrue
+ return pat
+ elseif ffi.istype(treepattern, val) then
+ assert(val.treesize > 0)
+ return val
+ end
+ assert(false)
+end
+
+local function copykeys(ktable1, ktable2)
+ local ktable, offset = {}, 0
+ if not ktable1 and not ktable2 then
+ return ktable, 0
+ elseif ktable1 then
+ for i = 1, #ktable1 do
+ ktable[#ktable + 1] = ktable1[i]
+ end
+ offset = #ktable1
+ if not ktable2 then
+ return ktable, 0
+ end
+ end
+ if ktable2 then
+ for i = 1, #ktable2 do
+ ktable[#ktable + 1] = ktable2[i]
+ end
+ end
+ assert(#ktable < 65536, "too many Lua values in pattern")
+ return ktable, offset
+end
+
+local function correctkeys(tree, index, offset)
+ local tag = tree.p[index].tag
+ if (tag == TSet or tag == TRule or tag == TCall or tag == TRunTime or tag == TOpenCall or tag == TCapture) and
+ tree.p[index].val ~= 0 then
+ tree.p[index].val = tree.p[index].val + offset
+ end
+ local ns = numsiblings[tag + 1]
+ if ns == 0 then
+ elseif ns == 1 then
+ return correctkeys(tree, index + 1, offset)
+ elseif ns == 2 then
+ correctkeys(tree, index + 1, offset)
+ return correctkeys(tree, index + tree.p[index].ps, offset)
+ else
+ assert(false)
+ end
+end
+
+
+
+-- create a new tree, with a new root and one sibling.
+
+local function newroot1sib(tag, pat)
+ local tree1 = getpatt(pat)
+ local tree = treepattern(1 + tree1.treesize) -- create new tree
+ valuetable[tree.id] = copykeys(valuetable[tree1.id])
+ tree.p[0].tag = tag
+ ffi.copy(tree.p + 1, tree1.p, ffi.sizeof(treepatternelement) * tree1.treesize)
+ return tree
+end
+
+
+-- create a new tree, with a new root and 2 siblings.
+
+local function newroot2sib(tag, pat1, pat2)
+ local tree1 = getpatt(pat1)
+ local tree2 = getpatt(pat2)
+ local tree = treepattern(1 + tree1.treesize + tree2.treesize) -- create new tree
+ local ktable, offset = copykeys(valuetable[tree1.id], valuetable[tree2.id])
+ valuetable[tree.id] = ktable
+ tree.p[0].tag = tag
+ tree.p[0].ps = 1 + tree1.treesize
+ ffi.copy(tree.p + 1, tree1.p, ffi.sizeof(treepatternelement) * tree1.treesize)
+ ffi.copy(tree.p + 1 + tree1.treesize, tree2.p, ffi.sizeof(treepatternelement) * tree2.treesize)
+ if offset > 0 then
+ correctkeys(tree, 1 + tree1.treesize, offset)
+ end
+ return tree;
+end
+
+
+local function lp_P(val, name)
+ assert(type(val) ~= 'nil')
+ return getpatt(val, name)
+end
+
+
+-- sequence operator; optimizations:
+-- false x => false, x true => x, true x => x
+-- (cannot do x . false => false because x may have runtime captures)
+
+local function lp_seq(pat1, pat2)
+ local tree1 = getpatt(pat1)
+ local tree2 = getpatt(pat2)
+ -- false . x == false, x . true = x
+ if tree1.p[0].tag == TFalse or tree2.p[0].tag == TTrue then
+ return tree1
+ -- true . x = x
+ elseif tree1.p[0].tag == TTrue then
+ return tree2
+ else
+ return newroot2sib(TSeq, tree1, tree2)
+ end
+end
+
+
+-- choice operator; optimizations:
+-- charset / charset => charset
+-- true / x => true, x / false => x, false / x => x
+-- (x / true is not equivalent to true)
+
+local function lp_choice(pat1, pat2)
+ local tree1 = getpatt(pat1)
+ local tree2 = getpatt(pat2)
+ local charset1 = lpcode.tocharset(tree1, 0, valuetable[tree1.id])
+ local charset2 = lpcode.tocharset(tree2, 0, valuetable[tree2.id])
+ if charset1 and charset2 then
+ local t, set = newcharset()
+ for i = 0, 7 do
+ set[i] = bor(charset1[i], charset2[i])
+ end
+ return t
+ elseif lpcode.checkaux(tree1, PEnofail, 0) or tree2.p[0].tag == TFalse then
+ return tree1 -- true / x => true, x / false => x
+ elseif tree1.p[0].tag == TFalse then
+ return tree2 -- false / x => x
+ else
+ return newroot2sib(TChoice, tree1, tree2)
+ end
+end
+
+
+-- p^n
+
+local function lp_star(tree1, n)
+ local tree
+ n = tonumber(n)
+ assert(type(n) == 'number')
+ -- seq tree1 (seq tree1 ... (seq tree1 (rep tree1)))
+ if n >= 0 then
+ tree = treepattern((n + 1) * (tree1.treesize + 1))
+ if lpcode.checkaux(tree1, PEnullable, 0) then
+ error("loop body may accept empty string", 0)
+ end
+ valuetable[tree.id] = copykeys(valuetable[tree1.id])
+ local start = 0
+ -- repeat 'n' times
+ for i = 1, n do
+ seqaux(tree, tree1, start, tree1.treesize)
+ start = start + tree.p[start].ps
+ end
+ tree.p[start].tag = TRep
+ ffi.copy(tree.p + start + 1, tree1.p, ffi.sizeof(treepatternelement) * tree1.treesize)
+ -- choice (seq tree1 ... choice tree1 true ...) true
+ else
+ n = -n;
+ -- size = (choice + seq + tree1 + true) * n, but the last has no seq
+ tree = treepattern(n * (tree1.treesize + 3) - 1)
+ valuetable[tree.id] = copykeys(valuetable[tree1.id])
+ local start = 0
+ -- repeat (n - 1) times
+ for i = n, 2, -1 do
+ tree.p[start].tag = TChoice;
+ tree.p[start].ps = i * (tree1.treesize + 3) - 2
+ tree.p[start + tree.p[start].ps].tag = TTrue;
+ start = start + 1
+ seqaux(tree, tree1, start, tree1.treesize)
+ start = start + tree.p[start].ps
+ end
+ tree.p[start].tag = TChoice;
+ tree.p[start].ps = tree1.treesize + 1
+ tree.p[start + tree.p[start].ps].tag = TTrue
+ ffi.copy(tree.p + start + 1, tree1.p, ffi.sizeof(treepatternelement) * tree1.treesize)
+ end
+ return tree
+end
+
+
+-- #p == &p
+
+local function lp_and(pat)
+ return newroot1sib(TAnd, pat)
+end
+
+
+-- -p == !p
+
+local function lp_not(pat)
+ return newroot1sib(TNot, pat)
+end
+
+
+-- [t1 - t2] == Seq (Not t2) t1
+-- If t1 and t2 are charsets, make their difference.
+
+local function lp_sub(pat1, pat2)
+ local tree1 = getpatt(pat1)
+ local tree2 = getpatt(pat2)
+ local charset1 = lpcode.tocharset(tree1, 0, valuetable[tree1.id])
+ local charset2 = lpcode.tocharset(tree2, 0, valuetable[tree2.id])
+ if charset1 and charset2 then
+ local tree, set = newcharset()
+ for i = 0, 7 do
+ set[i] = band(charset1[i], bnot(charset2[i]))
+ end
+ return tree
+ else
+ local tree = treepattern(2 + tree1.treesize + tree2.treesize)
+ local ktable, offset = copykeys(valuetable[tree2.id], valuetable[tree1.id])
+ valuetable[tree.id] = ktable
+ tree.p[0].tag = TSeq; -- sequence of...
+ tree.p[0].ps = 2 + tree2.treesize
+ tree.p[1].tag = TNot; -- ...not...
+ ffi.copy(tree.p + 2, tree2.p, ffi.sizeof(treepatternelement) * tree2.treesize)
+ ffi.copy(tree.p + tree2.treesize + 2, tree1.p, ffi.sizeof(treepatternelement) * tree1.treesize)
+ if offset > 0 then
+ correctkeys(tree, 2 + tree2.treesize, offset)
+ end
+ return tree
+ end
+end
+
+
+local function lp_set(val)
+ assert(type(val) == 'string')
+ local tree, set = newcharset()
+ for i = 1, #val do
+ local b = val:sub(i, i):byte()
+ set[rshift(b, 5)] = bor(set[rshift(b, 5)], lshift(1, band(b, 31)))
+ end
+ return tree
+end
+
+
+local function lp_range(...)
+ local args = { ... }
+ local top = #args
+ local tree, set = newcharset()
+ for i = 1, top do
+ assert(#args[i] == 2, args[i] .. " range must have two characters")
+ for b = args[i]:sub(1, 1):byte(), args[i]:sub(2, 2):byte() do
+ set[rshift(b, 5)] = bor(set[rshift(b, 5)], lshift(1, band(b, 31)))
+ end
+ end
+ return tree
+end
+
+
+-- Look-behind predicate
+
+local function lp_behind(pat)
+ local tree1 = getpatt(pat)
+ local n = lpcode.fixedlenx(tree1, 0, 0, 0)
+ assert(not lpcode.hascaptures(tree1, 0), "pattern have captures")
+ assert(n >= 0, "pattern may not have fixed length")
+ assert(n <= MAXBEHIND, "pattern too long to look behind")
+ local tree = newroot1sib(TBehind, pat)
+ tree.p[0].val = n;
+ return tree
+end
+
+
+-- Create a non-terminal
+
+local function lp_V(val, p)
+ assert(val, "non-nil value expected")
+ local tree = treepattern(1)
+ valuetable[tree.id] = { val }
+ tree.p[0].tag = TOpenCall
+ tree.p[0].val = 1
+ tree.p[0].cap = p or 0
+ return tree
+end
+
+
+-- Create a tree for a non-empty capture, with a body and
+-- optionally with an associated value
+
+local function capture_aux(cap, pat, val)
+ local tree = newroot1sib(TCapture, pat)
+ tree.p[0].cap = cap
+ if val then
+ local ind = #valuetable[tree.id] + 1
+ assert(ind <= 65536, "too many Lua values in pattern" .. ind)
+ valuetable[tree.id][ind] = val
+ tree.p[0].val = ind
+ end
+ return tree
+end
+
+
+-- Fill a tree with an empty capture, using an empty (TTrue) sibling.
+
+local function auxemptycap(tree, cap, par, start)
+ tree.p[start].tag = TCapture;
+ tree.p[start].cap = cap
+ if type(par) ~= 'nil' then
+ local ind = #valuetable[tree.id] + 1
+ assert(ind <= 65536, "too many Lua values in pattern")
+ valuetable[tree.id][ind] = par
+ tree.p[start].val = ind
+ end
+ tree.p[start + 1].tag = TTrue;
+end
+
+
+-- Create a tree for an empty capture
+
+local function newemptycap(cap, par)
+ local tree = treepattern(2)
+ if type(par) ~= 'nil' then valuetable[tree.id] = {} end
+ auxemptycap(tree, cap, par, 0)
+ return tree
+end
+
+
+-- Captures with syntax p / v
+-- (function capture, query capture, string capture, or number capture)
+
+local function lp_divcapture(pat, par, xxx)
+ local typ = type(par)
+ if typ == "function" then
+ return capture_aux(Cfunction, pat, par)
+ elseif typ == "table" then
+ return capture_aux(Cquery, pat, par)
+ elseif typ == "string" then
+ return capture_aux(Cstring, pat, par)
+ elseif typ == "number" then
+ local tree = newroot1sib(TCapture, pat)
+ assert(0 <= par and par <= 0xffff, "invalid number")
+ tree.p[0].cap = Cnum;
+ local ind = #valuetable[tree.id] + 1
+ assert(ind <= 65536, "too many Lua values in pattern")
+ valuetable[tree.id][ind] = par
+ tree.p[0].val = ind
+ return tree
+ else
+ error("invalid replacement value", 0)
+ end
+end
+
+
+local function lp_substcapture(pat)
+ return capture_aux(Csubst, pat)
+end
+
+
+local function lp_tablecapture(pat)
+ return capture_aux(Ctable, pat, 0)
+end
+
+
+local function lp_groupcapture(pat, val)
+ if not val then
+ return capture_aux(Cgroup, pat)
+ else
+ return capture_aux(Cgroup, pat, val)
+ end
+end
+
+
+local function lp_foldcapture(pat, fce)
+ assert(type(fce) == 'function')
+ return capture_aux(Cfold, pat, fce)
+end
+
+
+local function lp_simplecapture(pat)
+ return capture_aux(Csimple, pat)
+end
+
+
+local function lp_poscapture()
+ return newemptycap(Cposition)
+end
+
+
+local function lp_argcapture(val)
+ assert(type(val) == 'number')
+ local tree = newemptycap(Carg, 0)
+ local ind = #valuetable[tree.id] + 1
+ assert(ind <= 65536, "too many Lua values in pattern")
+ valuetable[tree.id][ind] = val
+ tree.p[0].val = ind
+ assert(0 < val and val <= 0xffff, "invalid argument index")
+ return tree
+end
+
+
+local function lp_backref(val)
+ return newemptycap(Cbackref, val)
+end
+
+
+-- Constant capture
+
+local function lp_constcapture(...)
+ local tree
+ local args = { ... }
+ local n = select('#', ...) -- number of values
+ -- no values?
+ if n == 0 then
+ tree = treepattern(1) -- no capture
+ tree.p[0].tag = TTrue
+ elseif n == 1 then
+ tree = newemptycap(Cconst, args[1]) -- single constant capture
+ -- create a group capture with all values
+ else
+ tree = treepattern(3 + 3 * (n - 1))
+ valuetable[tree.id] = {}
+ tree.p[0].tag = TCapture
+ tree.p[0].cap = Cgroup
+ local start = 1
+ for i = 1, n - 1 do
+ tree.p[start].tag = TSeq
+ tree.p[start].ps = 3
+ auxemptycap(tree, Cconst, args[i], start + 1)
+ start = start + tree.p[start].ps
+ end
+ auxemptycap(tree, Cconst, args[n], start)
+ end
+ return tree
+end
+
+
+local function lp_matchtime(pat, fce, name)
+ assert(type(fce) == 'function')
+ if name and type(name) == 'string' then
+ funcnames[fce] = name
+ end
+ local tree = newroot1sib(TRunTime, pat)
+ local ind = #valuetable[tree.id] + 1
+ assert(ind <= 65536, "too many Lua values in pattern")
+ valuetable[tree.id][ind] = fce
+ tree.p[0].val = ind
+ return tree
+end
+
+-- ======================================================
+
+
+
+-- ======================================================
+-- Grammar - Tree generation
+-- =======================================================
+
+
+-- return index and the pattern for the
+-- initial rule of grammar;
+-- also add that index into position table.
+
+local function getfirstrule(pat, postab)
+ local key
+ -- access first element
+ if type(pat[1]) == 'string' then
+ key = pat[1]
+ else
+ key = 1
+ end
+ local rule = pat[key]
+ if not rule then
+ error("grammar has no initial rule", 0)
+ end
+ -- initial rule not a pattern?
+ if not ffi.istype(treepattern, rule) then
+ error(("initial rule '%s' is not a pattern"):format(tostring(key)), 0)
+ end
+ postab[key] = 1
+ return key, rule
+end
+
+
+-- traverse grammar, collect all its keys and patterns
+-- into rule table. Create a new table (before all pairs key-pattern) to
+-- collect all keys and their associated positions in the final tree
+-- (the "position table").
+-- Return the number of rules and the total size
+-- for the new tree.
+
+local function collectrules(pat)
+ local n = 1; -- to count number of rules
+ local postab = {}
+ local firstkeyrule, firstrule = getfirstrule(pat, postab)
+ local rules = { firstkeyrule, firstrule }
+ local size = 2 + firstrule.treesize -- TGrammar + TRule + rule
+ for key, val in pairs(pat) do
+ -- initial rule?
+ if key ~= 1 and tostring(val) ~= tostring(firstrule) then
+ -- value is not a pattern?
+ if not ffi.istype(treepattern, val) then
+ error(("rule '%s' is not a pattern"):format(tostring(key)), 0)
+ end
+ rules[#rules + 1] = key
+ rules[#rules + 1] = val
+ postab[key] = size
+ size = 1 + size + val.treesize
+ n = n + 1
+ end
+ end
+ size = size + 1; -- TTrue to finish list of rules
+ return n, size, rules, postab
+end
+
+
+local function buildgrammar(grammar, rules, n, index, valuetable)
+ local ktable, offset = {}, 0
+ -- add each rule into new tree
+ for i = 1, n do
+ local size = rules[i * 2].treesize
+ grammar.p[index].tag = TRule;
+ grammar.p[index].cap = i; -- rule number
+ grammar.p[index].ps = size + 1; -- point to next rule
+ local ind = #ktable + 1
+ ktable[ind] = rules[i * 2 - 1]
+ grammar.p[index].val = ind
+ ffi.copy(grammar.p + index + 1, rules[i * 2].p, ffi.sizeof(treepatternelement) * size) -- copy rule
+ ktable, offset = copykeys(ktable, valuetable[rules[i * 2].id])
+ if offset > 0 then
+ correctkeys(grammar, index + 1, offset)
+ end
+ index = index + grammar.p[index].ps; -- move to next rule
+ end
+ grammar.p[index].tag = TTrue; -- finish list of rules
+ return ktable
+end
+
+
+-- Check whether a tree has potential infinite loops
+
+local function checkloops(tree, index)
+ local tag = tree.p[index].tag
+ if tag == TRep and lpcode.checkaux(tree, PEnullable, index + 1) then
+ return true
+ elseif tag == TGrammar then
+ return -- sub-grammars already checked
+ else
+ local tag = numsiblings[tree.p[index].tag + 1]
+ if tag == 0 then
+ return
+ elseif tag == 1 then
+ return checkloops(tree, index + 1)
+ elseif tag == 2 then
+ if checkloops(tree, index + 1) then
+ return true
+ else
+ return checkloops(tree, index + tree.p[index].ps)
+ end
+ else
+ assert(false)
+ end
+ end
+end
+
+-- Check whether a rule can be left recursive; returns PEleftrecursion in that
+-- case; otherwise return 1 iff pattern is nullable.
+
+local function verifyrule(rulename, tree, passed, nullable, index, valuetable)
+ local tag = tree.p[index].tag
+ if tag == TChar or tag == TSet or tag == TAny or tag == TFalse then
+ return nullable; -- cannot pass from here
+ elseif tag == TTrue or tag == TBehind then
+ return true;
+ elseif tag == TNot or tag == TAnd or tag == TRep then
+ return verifyrule(rulename, tree, passed, true, index + 1, valuetable)
+ elseif tag == TCapture or tag == TRunTime then
+ return verifyrule(rulename, tree, passed, nullable, index + 1, valuetable)
+ elseif tag == TCall then
+ local rule = valuetable[tree.p[index].val]
+ if rule == rulename then return PEleftrecursion end
+ if passed[rule] and passed[rule] > MAXRULES then
+ return nullable
+ end
+ return verifyrule(rulename, tree, passed, nullable, index + tree.p[index].ps, valuetable)
+ -- only check 2nd child if first is nullable
+ elseif tag == TSeq then
+ local res = verifyrule(rulename, tree, passed, false, index + 1, valuetable)
+ if res == PEleftrecursion then
+ return res
+ elseif not res then
+ return nullable
+ else
+ return verifyrule(rulename, tree, passed, nullable, index + tree.p[index].ps, valuetable)
+ end
+ -- must check both children
+ elseif tag == TChoice then
+ nullable = verifyrule(rulename, tree, passed, nullable, index + 1, valuetable)
+ if nullable == PEleftrecursion then return nullable end
+ return verifyrule(rulename, tree, passed, nullable, index + tree.p[index].ps, valuetable)
+ elseif tag == TRule then
+ local rule = valuetable[tree.p[index].val]
+ passed[rule] = (passed[rule] or 0) + 1
+ return verifyrule(rulename, tree, passed, nullable, index + 1, valuetable)
+ elseif tag == TGrammar then
+ return lpcode.checkaux(tree, PEnullable, index) -- sub-grammar cannot be left recursive
+ else
+ assert(false)
+ end
+end
+
+
+local function verifygrammar(rule, index, valuetable)
+ -- check left-recursive rules
+ local LR = {}
+ local ind = index + 1
+ while rule.p[ind].tag == TRule do
+ local rulename = valuetable[rule.p[ind].val]
+ -- used rule
+ if rulename then
+ if verifyrule(rulename, rule, {}, false, ind + 1, valuetable) == PEleftrecursion then
+ if not LREnable then
+ error(("rule '%s' may be left recursive"):format(rulename), 0)
+ end
+ LR[rulename] = true
+ end
+ end
+ ind = ind + rule.p[ind].ps
+ end
+ assert(rule.p[ind].tag == TTrue)
+
+ for i = 0, rule.treesize - 1 do
+ if rule.p[i].tag == TRule and LR[valuetable[rule.p[i].val]] then
+ rule.p[i].cap = bor(rule.p[i].cap, RuleLR) --TRule can be left recursive
+ end
+ if rule.p[i].tag == TCall and LR[valuetable[rule.p[i].val]] then
+ if rule.p[i].cap == 0 then
+ rule.p[i].cap = 1 --TCall can be left recursive
+ end
+ end
+ end
+
+ -- check infinite loops inside rules
+ ind = index + 1
+ while rule.p[ind].tag == TRule do
+ -- used rule
+ if rule.p[ind].val then
+ if checkloops(rule, ind + 1) then
+ error(("empty loop in rule '%s'"):format(tostring(valuetable[rule.p[ind].val])), 0)
+ end
+ end
+ ind = ind + rule.p[ind].ps
+ end
+ assert(rule.p[ind].tag == TTrue)
+end
+
+
+-- Give a name for the initial rule if it is not referenced
+
+local function initialrulename(grammar, val, valuetable)
+ grammar.p[1].cap = bit.bor(grammar.p[1].cap, Ruleused)
+ -- initial rule is not referenced?
+ if grammar.p[1].val == 0 then
+ local ind = #valuetable + 1
+ assert(ind <= 65536, "too many Lua values in pattern")
+ valuetable[ind] = val
+ grammar.p[1].val = ind
+ end
+end
+
+
+function newgrammar(pat)
+ -- traverse grammar. Create a new table (before all pairs key-pattern) to
+ -- collect all keys and their associated positions in the final tree
+ -- (the "position table").
+ -- Return new tree.
+
+ local n, size, rules, postab = collectrules(pat)
+ local grammar = treepattern(size)
+ local start = 0
+ grammar.p[start].tag = TGrammar
+ grammar.p[start].val = n
+ valuetable[grammar.id] = buildgrammar(grammar, rules, n, start + 1, valuetable)
+ finalfix(true, postab, grammar, start + 1, valuetable[grammar.id])
+ initialrulename(grammar, rules[1], valuetable[grammar.id])
+ verifygrammar(grammar, 0, valuetable[grammar.id])
+ return grammar
+end
+
+-- ======================================================
+
+-- remove duplicity from value table
+
+local function reducevaluetable(p)
+ local vtable = valuetable[p.id]
+ local value = {}
+ local newvaluetable = {}
+
+ local function check(v)
+ if v > 0 then
+ local ord = value[vtable[v]]
+ if not ord then
+ newvaluetable[#newvaluetable + 1] = vtable[v]
+ ord = #newvaluetable
+ value[vtable[v]] = ord
+ end
+ return ord
+ end
+ return 0
+ end
+
+ local function itertree(p, index)
+ local tag = p.p[index].tag
+ if tag == TSet or tag == TCall or tag == TOpenCall or
+ tag == TRule or tag == TCapture or tag == TRunTime then
+ p.p[index].val = check(p.p[index].val)
+ end
+ local ns = numsiblings[tag + 1]
+ if ns == 0 then
+ elseif ns == 1 then
+ return itertree(p, index + 1)
+ elseif ns == 2 then
+ itertree(p, index + 1)
+ return itertree(p, index + p.p[index].ps)
+ else
+ assert(false)
+ end
+ end
+
+ if p.treesize > 0 then
+ itertree(p, 0)
+ end
+ if p.code ~= nil then
+ for i = 0, p.code.size - 1 do
+ local code = p.code.p[i].code
+ if code == ICall or code == IJmp then
+ p.code.p[i].aux = check(p.code.p[i].aux)
+ elseif code == ISet or code == ITestSet or code == ISpan then
+ p.code.p[i].val = check(p.code.p[i].val)
+ elseif code == IOpenCapture or code == IFullCapture then
+ p.code.p[i].offset = check(p.code.p[i].offset)
+ end
+ end
+ end
+ valuetable[p.id] = newvaluetable
+end
+
+
+local function checkalt(tree)
+ local notchecked = {}
+ local notinalternativerules = {}
+
+ local function iter(tree, index, choice, rule)
+ local tag = tree[index].tag
+ if tag == TCapture and bit.band(tree[index].cap, 0xffff) == Cgroup then
+ if not choice then
+ if rule then
+ notchecked[rule] = index
+ end
+ else
+ tree[index].cap = bit.bor(tree[index].cap, BCapcandelete)
+ end
+ elseif tag == TChoice then
+ choice = true
+ elseif tag == TRule then
+ rule = tree[index].val
+ if bit.band(tree[index].cap, 0xffff) - 1 == 0 then
+ notinalternativerules[rule] = notinalternativerules[rule] or true
+ end
+ elseif tag == TCall then
+ local r = tree[index].val
+ if not choice then
+ notinalternativerules[r] = notinalternativerules[r] or true
+ end
+ end
+ local sibs = numsiblings[tree[index].tag + 1] or 0
+ if sibs >= 1 then
+ iter(tree, index + 1, choice, rule)
+ if sibs >= 2 then
+ return iter(tree, index + tree[index].ps, choice, rule)
+ end
+ end
+ end
+
+ iter(tree, 0)
+ for k, v in pairs(notchecked) do
+ if not notinalternativerules[k] then
+ tree[v].cap = bit.bor(tree[v].cap, BCapcandelete)
+ end
+ end
+end
+
+
+local function prepcompile(p, index)
+ finalfix(false, nil, p, index, valuetable[p.id])
+ checkalt(p.p)
+ lpcode.compile(p, index, valuetable[p.id])
+ reducevaluetable(p)
+ return p.code
+end
+
+
+local function lp_printtree(pat, c)
+ assert(pat.treesize > 0)
+ if c then
+ finalfix(false, nil, pat, 0, valuetable[pat.id])
+ end
+ lpprint.printtree(pat.p, 0, 0, valuetable[pat.id])
+end
+
+
+local function lp_printcode(pat)
+ -- not compiled yet?
+ if pat.code == nil then
+ prepcompile(pat, 0)
+ end
+ lpprint.printpatt(pat.code, valuetable[pat.id])
+end
+
+
+-- Main match function
+
+local function lp_match(pat, s, init, ...)
+ local p = ffi.istype(treepattern, pat) and pat or getpatt(pat)
+ p.code = p.code ~= nil and p.code or prepcompile(p, 0)
+ return lpvm.match(p, s, init, valuetable[p.id], ...)
+end
+
+local function lp_streammatch(pat, init, ...)
+ local p = ffi.istype(treepattern, pat) and pat or getpatt(pat)
+ p.code = p.code ~= nil and p.code or prepcompile(p, 0)
+ return lpvm.streammatch(p, init, valuetable[p.id], ...)
+end
+
+-- Only for testing purpose
+-- stream emulation (send all chars from string one char after char)
+local function lp_emulatestreammatch(pat, s, init, ...)
+ local p = ffi.istype(treepattern, pat) and pat or getpatt(pat)
+ p.code = p.code ~= nil and p.code or prepcompile(p, 0)
+ return lpvm.emulatestreammatch(p, s, init, valuetable[p.id], ...)
+end
+
+-- {======================================================
+-- Library creation and functions not related to matching
+-- =======================================================
+
+local function lp_setmax(val)
+ lpvm.setmax(val)
+end
+
+local function lp_setmaxbehind(val)
+ lpvm.setmaxbehind(val)
+end
+
+local function lp_enableleftrecursion(val)
+ LREnable = val
+end
+
+local function lp_version()
+ return VERSION
+end
+
+
+local function lp_type(pat)
+ if ffi.istype(treepattern, pat) then
+ return "pattern"
+ end
+end
+
+
+local function createcat(tab, catname, catfce)
+ local t, set = newcharset()
+ for i = 0, 255 do
+ if catfce(i) ~= 0 then
+ set[rshift(i, 5)] = bor(set[rshift(i, 5)], lshift(1, band(i, 31)))
+ end
+ end
+ tab[catname] = t
+end
+
+
+local function lp_locale(tab)
+ tab = tab or {}
+ createcat(tab, "alnum", function(c) return ffi.C.isalnum(c) end)
+ createcat(tab, "alpha", function(c) return ffi.C.isalpha(c) end)
+ createcat(tab, "cntrl", function(c) return ffi.C.iscntrl(c) end)
+ createcat(tab, "digit", function(c) return ffi.C.isdigit(c) end)
+ createcat(tab, "graph", function(c) return ffi.C.isgraph(c) end)
+ createcat(tab, "lower", function(c) return ffi.C.islower(c) end)
+ createcat(tab, "print", function(c) return ffi.C.isprint(c) end)
+ createcat(tab, "punct", function(c) return ffi.C.ispunct(c) end)
+ createcat(tab, "space", function(c) return ffi.C.isspace(c) end)
+ createcat(tab, "upper", function(c) return ffi.C.isupper(c) end)
+ createcat(tab, "xdigit", function(c) return ffi.C.isxdigit(c) end)
+ return tab
+end
+
+
+local function lp_new(ct, size)
+ local pat = ffi.new(ct, size)
+ pat.treesize = size
+ patternid = patternid + 1
+ pat.id = patternid
+ return pat
+end
+
+
+local function lp_gc(ct)
+ valuetable[ct.id] = nil
+ if ct.code ~= nil then
+ ffi.C.free(ct.code.p)
+ ffi.C.free(ct.code)
+ end
+end
+
+local function lp_eq(ct1, ct2)
+ return tostring(ct1) == tostring(ct2)
+end
+
+local function lp_load(str, fcetab)
+ local pat, t = lpvm.load(str, fcetab, true)
+ valuetable[pat.id] = t
+ return pat
+end
+
+local function lp_loadfile(fname, fcetab)
+ local pat, t = lpvm.loadfile(fname, fcetab, true)
+ valuetable[pat.id] = t
+ return pat
+end
+
+local function lp_dump(ct, tree)
+ local funccount = 0
+ -- not compiled yet?
+ if ct.code == nil then
+ prepcompile(ct, 0)
+ end
+ local out = {}
+ if tree then
+ out[#out + 1] = ffi.string(uint32(ct.treesize), 4)
+ out[#out + 1] = ffi.string(ct.p, ffi.sizeof(treepatternelement) * ct.treesize)
+ else
+ out[#out + 1] = ffi.string(uint32(0), 4)
+ end
+ out[#out + 1] = ffi.string(uint32(ct.code.size), 4)
+ out[#out + 1] = ffi.string(ct.code.p, ct.code.size * ffi.sizeof(patternelement))
+ local t = valuetable[ct.id]
+ local len = t and #t or 0
+ out[#out + 1] = ffi.string(uint32(len), 4)
+ if len > 0 then
+ for _, val in ipairs(t) do
+ local typ = type(val)
+ if typ == 'string' then
+ out[#out + 1] = 'str'
+ out[#out + 1] = ffi.string(uint32(#val), 4)
+ out[#out + 1] = val
+ elseif typ == 'number' then
+ local val = tostring(val)
+ out[#out + 1] = 'num'
+ out[#out + 1] = ffi.string(uint32(#val), 4)
+ out[#out + 1] = val
+ elseif typ == 'cdata' then
+ out[#out + 1] = 'cdt'
+ out[#out + 1] = ffi.string(val, ffi.sizeof(val))
+ elseif typ == 'function' then
+ out[#out + 1] = 'fnc'
+ funccount = funccount + 1
+ local name = funcnames[val] or ('FNAME%03d'):format(funccount)
+ out[#out + 1] = ffi.string(uint32(#name), 4)
+ out[#out + 1] = name
+ if not funcnames[val] and debug.getupvalue(val, 1) then
+ io.write(("Patterns function (%d) contains upvalue (%s) - use symbol name for function (%s).\n"):format(funccount, debug.getupvalue(val, 1), name), 0)
+ end
+ local data = string.dump(val, true)
+ out[#out + 1] = ffi.string(uint32(#data), 4)
+ out[#out + 1] = data
+ else
+ error(("Type '%s' NYI for dump"):format(typ), 0)
+ end
+ end
+ end
+ return table.concat(out)
+end
+
+local function lp_save(ct, fname, tree)
+ local file = assert(io.open(fname, 'wb'))
+ file:write(lp_dump(ct, tree))
+ file:close()
+end
+
+
+local pattreg = {
+ ["ptree"] = lp_printtree,
+ ["pcode"] = lp_printcode,
+ ["match"] = lp_match,
+ ["streammatch"] = lp_streammatch,
+ ["emulatestreammatch"] = lp_emulatestreammatch,
+ ["setmaxbehind"] = lp_setmaxbehind,
+ ["B"] = lp_behind,
+ ["V"] = lp_V,
+ ["C"] = lp_simplecapture,
+ ["Cc"] = lp_constcapture,
+ ["Cmt"] = lp_matchtime,
+ ["Cb"] = lp_backref,
+ ["Carg"] = lp_argcapture,
+ ["Cp"] = lp_poscapture,
+ ["Cs"] = lp_substcapture,
+ ["Ct"] = lp_tablecapture,
+ ["Cf"] = lp_foldcapture,
+ ["Cg"] = lp_groupcapture,
+ ["P"] = lp_P,
+ ["S"] = lp_set,
+ ["R"] = lp_range,
+ ["L"] = lp_and,
+ ["locale"] = lp_locale,
+ ["version"] = lp_version,
+ ["setmaxstack"] = lp_setmax,
+ ["type"] = lp_type,
+ ["enableleftrecursion"] = lp_enableleftrecursion,
+ ["enablememoization"] = lpvm.enablememoization,
+ ["enabletracing"] = lpvm.enabletracing,
+ ["save"] = lp_save,
+ ["dump"] = lp_dump,
+ ["load"] = lp_load,
+ ["loadfile"] = lp_loadfile,
+ ["__mul"] = lp_seq,
+ ["__add"] = lp_choice,
+ ["__pow"] = lp_star,
+ ["__len"] = lp_and,
+ ["__div"] = lp_divcapture,
+ ["__unm"] = lp_not,
+ ["__sub"] = lp_sub,
+}
+
+local metareg = {
+ ["__gc"] = lp_gc,
+ ["__new"] = lp_new,
+ ["__mul"] = lp_seq,
+ ["__add"] = lp_choice,
+ ["__pow"] = lp_star,
+ ["__len"] = lp_and,
+ ["__div"] = lp_divcapture,
+ ["__unm"] = lp_not,
+ ["__sub"] = lp_sub,
+ ["__eq"] = lp_eq,
+ ["__index"] = pattreg
+}
+
+ffi.metatype(treepattern, metareg)
+
+return pattreg
diff --git a/tools/lpeg/lpprint.lua b/tools/lpeg/lpprint.lua
new file mode 100644
index 0000000..86f6897
--- /dev/null
+++ b/tools/lpeg/lpprint.lua
@@ -0,0 +1,356 @@
+--[[
+LPEGLJ
+lpprint.lua
+Tree, code and debug print function (only for debuging)
+Copyright (C) 2014 Rostislav Sacek.
+based on LPeg v1.0 - PEG pattern matching for Lua
+Lua.org & PUC-Rio written by Roberto Ierusalimschy
+http://www.inf.puc-rio.br/~roberto/lpeg/
+
+** Permission is hereby granted, free of charge, to any person obtaining
+** a copy of this software and associated documentation files (the
+** "Software"), to deal in the Software without restriction, including
+** without limitation the rights to use, copy, modify, merge, publish,
+** distribute, sublicense, and/or sell copies of the Software, and to
+** permit persons to whom the Software is furnished to do so, subject to
+** the following conditions:
+**
+** The above copyright notice and this permission notice shall be
+** included in all copies or substantial portions of the Software.
+**
+** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+** SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+**
+** [ MIT license: http://www.opensource.org/licenses/mit-license.php ]
+--]]
+
+local ffi = require"ffi"
+local band, rshift, lshift = bit.band, bit.rshift, bit.lshift
+
+ffi.cdef[[
+ int isprint ( int c );
+]]
+
+local RuleLR = 0x10000
+local Ruleused = 0x20000
+
+-- {======================================================
+-- Printing patterns (for debugging)
+-- =======================================================
+
+local TChar = 0
+local TSet = 1
+local TAny = 2 -- standard PEG elements
+local TTrue = 3
+local TFalse = 4
+local TRep = 5
+local TSeq = 6
+local TChoice = 7
+local TNot = 8
+local TAnd = 9
+local TCall = 10
+local TOpenCall = 11
+local TRule = 12 -- sib1 is rule's pattern, sib2 is 'next' rule
+local TGrammar = 13 -- sib1 is initial (and first) rule
+local TBehind = 14 -- match behind
+local TCapture = 15 -- regular capture
+local TRunTime = 16 -- run-time capture
+
+local IAny = 0 -- if no char, fail
+local IChar = 1 -- if char != aux, fail
+local ISet = 2 -- if char not in val, fail
+local ITestAny = 3 -- in no char, jump to 'offset'
+local ITestChar = 4 -- if char != aux, jump to 'offset'
+local ITestSet = 5 -- if char not in val, jump to 'offset'
+local ISpan = 6 -- read a span of chars in val
+local IBehind = 7 -- walk back 'aux' characters (fail if not possible)
+local IRet = 8 -- return from a rule
+local IEnd = 9 -- end of pattern
+local IChoice = 10 -- stack a choice; next fail will jump to 'offset'
+local IJmp = 11 -- jump to 'offset'
+local ICall = 12 -- call rule at 'offset'
+local IOpenCall = 13 -- call rule number 'offset' (must be closed to a ICall)
+local ICommit = 14 -- pop choice and jump to 'offset'
+local IPartialCommit = 15 -- update top choice to current position and jump
+local IBackCommit = 16 -- "fails" but jump to its own 'offset'
+local IFailTwice = 17 -- pop one choice and then fail
+local IFail = 18 -- go back to saved state on choice and jump to saved offset
+local IGiveup = 19 -- internal use
+local IFullCapture = 20 -- complete capture of last 'off' chars
+local IOpenCapture = 21 -- start a capture
+local ICloseCapture = 22
+local ICloseRunTime = 23
+
+local Cclose = 0
+local Cposition = 1
+local Cconst = 2
+local Cbackref = 3
+local Carg = 4
+local Csimple = 5
+local Ctable = 6
+local Cfunction = 7
+local Cquery = 8
+local Cstring = 9
+local Cnum = 10
+local Csubst = 11
+local Cfold = 12
+local Cruntime = 13
+local Cgroup = 14
+
+
+-- number of siblings for each tree
+local numsiblings = {
+ [TRep] = 1,
+ [TSeq] = 2,
+ [TChoice] = 2,
+ [TNot] = 1,
+ [TAnd] = 1,
+ [TRule] = 2,
+ [TGrammar] = 1,
+ [TBehind] = 1,
+ [TCapture] = 1,
+ [TRunTime] = 1,
+}
+local names = {
+ [IAny] = "any",
+ [IChar] = "char",
+ [ISet] = "set",
+ [ITestAny] = "testany",
+ [ITestChar] = "testchar",
+ [ITestSet] = "testset",
+ [ISpan] = "span",
+ [IBehind] = "behind",
+ [IRet] = "ret",
+ [IEnd] = "end",
+ [IChoice] = "choice",
+ [IJmp] = "jmp",
+ [ICall] = "call",
+ [IOpenCall] = "open_call",
+ [ICommit] = "commit",
+ [IPartialCommit] = "partial_commit",
+ [IBackCommit] = "back_commit",
+ [IFailTwice] = "failtwice",
+ [IFail] = "fail",
+ [IGiveup] = "giveup",
+ [IFullCapture] = "fullcapture",
+ [IOpenCapture] = "opencapture",
+ [ICloseCapture] = "closecapture",
+ [ICloseRunTime] = "closeruntime"
+}
+
+local function printcharset(st)
+ io.write("[");
+ local i = 0
+ while i <= 255 do
+ local first = i;
+ while band(st[rshift(i, 5)], lshift(1, band(i, 31))) ~= 0 and i <= 255 do
+ i = i + 1
+ end
+ if i - 1 == first then -- unary range?
+ io.write(("(%02x)"):format(first))
+ elseif i - 1 > first then -- non-empty range?
+ io.write(("(%02x-%02x)"):format(first, i - 1))
+ end
+ i = i + 1
+ end
+ io.write("]")
+end
+
+local modes = {
+ [Cclose] = "close",
+ [Cposition] = "position",
+ [Cconst] = "constant",
+ [Cbackref] = "backref",
+ [Carg] = "argument",
+ [Csimple] = "simple",
+ [Ctable] = "table",
+ [Cfunction] = "function",
+ [Cquery] = "query",
+ [Cstring] = "string",
+ [Cnum] = "num",
+ [Csubst] = "substitution",
+ [Cfold] = "fold",
+ [Cruntime] = "runtime",
+ [Cgroup] = "group"
+}
+
+local function printcapkind(kind)
+ io.write(("%s"):format(modes[kind]))
+end
+
+local function printjmp(p, index)
+ io.write(("-> %d"):format(index + p[index].offset))
+end
+
+local function printrulename(p, index, rulenames)
+ if rulenames and rulenames[index + p[index].offset] then
+ io.write(' ', rulenames[index + p[index].offset])
+ end
+end
+
+local function printinst(p, index, valuetable, rulenames)
+ local code = p[index].code
+ if rulenames and rulenames[index] then
+ io.write(rulenames[index], '\n')
+ end
+ io.write(("%04d: %s "):format(index, names[code]))
+ if code == IChar then
+ io.write(("'%s'"):format(string.char(p[index].val)))
+ elseif code == ITestChar then
+ io.write(("'%s'"):format(string.char(p[index].val)))
+ printjmp(p, index)
+ printrulename(p, index, rulenames)
+ elseif code == IFullCapture then
+ printcapkind(band(p[index].val, 0x0f));
+ io.write((" (size = %d) (idx = %s)"):format(band(rshift(p[index].val, 4), 0xF), tostring(valuetable[p[index].offset])))
+ elseif code == IOpenCapture then
+ printcapkind(band(p[index].val, 0x0f))
+ io.write((" (idx = %s)"):format(tostring(valuetable[p[index].offset])))
+ elseif code == ISet then
+ printcharset(valuetable[p[index].val]);
+ elseif code == ITestSet then
+ printcharset(valuetable[p[index].val])
+ printjmp(p, index);
+ printrulename(p, index, rulenames)
+ elseif code == ISpan then
+ printcharset(valuetable[p[index].val]);
+ elseif code == IOpenCall then
+ io.write(("-> %d"):format(p[index].offset))
+ elseif code == IBehind then
+ io.write(("%d"):format(p[index].val))
+ elseif code == IJmp or code == ICall or code == ICommit or code == IChoice or
+ code == IPartialCommit or code == IBackCommit or code == ITestAny then
+ printjmp(p, index);
+ if (code == ICall or code == IJmp) and p[index].aux > 0 then
+ io.write(' ', valuetable[p[index].aux])
+ else
+ printrulename(p, index, rulenames)
+ end
+ end
+ io.write("\n")
+end
+
+
+local function printpatt(p, valuetable)
+ local ruleNames = {}
+ for i = 0, p.size - 1 do
+ local code = p.p[i].code
+ if (code == ICall or code == IJmp) and p.p[i].aux > 0 then
+ local index = i + p.p[i].offset
+ ruleNames[index] = valuetable[p.p[i].aux]
+ end
+ end
+ for i = 0, p.size - 1 do
+ printinst(p.p, i, valuetable, ruleNames)
+ end
+end
+
+
+local function printcap(cap, index, valuetable)
+ printcapkind(cap[index].kind)
+ io.write((" (idx: %s - size: %d) -> %d\n"):format(valuetable[cap[index].idx], cap[index].siz, cap[index].s))
+end
+
+
+local function printcaplist(cap, limit, valuetable)
+ io.write(">======\n")
+ local index = 0
+ while cap[index].s and index < limit do
+ printcap(cap, index, valuetable)
+ index = index + 1
+ end
+ io.write("=======\n")
+end
+
+-- ======================================================
+
+
+
+-- {======================================================
+-- Printing trees (for debugging)
+-- =======================================================
+
+local tagnames = {
+ [TChar] = "char",
+ [TSet] = "set",
+ [TAny] = "any",
+ [TTrue] = "true",
+ [TFalse] = "false",
+ [TRep] = "rep",
+ [TSeq] = "seq",
+ [TChoice] = "choice",
+ [TNot] = "not",
+ [TAnd] = "and",
+ [TCall] = "call",
+ [TOpenCall] = "opencall",
+ [TRule] = "rule",
+ [TGrammar] = "grammar",
+ [TBehind] = "behind",
+ [TCapture] = "capture",
+ [TRunTime] = "run-time"
+}
+
+
+local function printtree(tree, ident, index, valuetable)
+ for i = 1, ident do
+ io.write(" ")
+ end
+ local tag = tree[index].tag
+ io.write(("%s"):format(tagnames[tag]))
+ if tag == TChar then
+ local c = tree[index].val
+ if ffi.C.isprint(c) then
+ io.write((" '%c'\n"):format(c))
+ else
+ io.write((" (%02X)\n"):format(c))
+ end
+ elseif tag == TSet then
+ printcharset(valuetable[tree[index].val]);
+ io.write("\n")
+ elseif tag == TOpenCall or tag == TCall then
+ io.write((" key: %s\n"):format(tostring(valuetable[tree[index].val])))
+ elseif tag == TBehind then
+ io.write((" %d\n"):format(tree[index].val))
+ printtree(tree, ident + 2, index + 1, valuetable);
+ elseif tag == TCapture then
+ io.write((" cap: %s n: %s\n"):format(modes[bit.band(tree[index].cap, 0xffff)], valuetable[tree[index].val]))
+ printtree(tree, ident + 2, index + 1, valuetable);
+ elseif tag == TRule then
+ local extra = bit.band(tree[index].cap, RuleLR) == RuleLR and ' left recursive' or ''
+ extra = extra .. (bit.band(tree[index].cap, Ruleused) ~= Ruleused and ' not used' or '')
+ io.write((" n: %d key: %s%s\n"):format(bit.band(tree[index].cap, 0xffff) - 1, valuetable[tree[index].val], extra))
+ printtree(tree, ident + 2, index + 1, valuetable);
+ -- do not print next rule as a sibling
+ elseif tag == TGrammar then
+ local ruleindex = index + 1
+ io.write((" %d\n"):format(tree[index].val)) -- number of rules
+ for i = 1, tree[index].val do
+ printtree(tree, ident + 2, ruleindex, valuetable);
+ ruleindex = ruleindex + tree[ruleindex].ps
+ end
+ assert(tree[ruleindex].tag == TTrue); -- sentinel
+ else
+ local sibs = numsiblings[tree[index].tag] or 0
+ io.write("\n")
+ if sibs >= 1 then
+ printtree(tree, ident + 2, index + 1, valuetable);
+ if sibs >= 2 then
+ printtree(tree, ident + 2, index + tree[index].ps, valuetable)
+ end
+ end
+ end
+end
+
+-- }====================================================== */
+
+return {
+ printtree = printtree,
+ printpatt = printpatt,
+ printcaplist = printcaplist,
+ printinst = printinst
+}
\ No newline at end of file
diff --git a/tools/lpeg/lpvm.lua b/tools/lpeg/lpvm.lua
new file mode 100644
index 0000000..1a86dc4
--- /dev/null
+++ b/tools/lpeg/lpvm.lua
@@ -0,0 +1,1041 @@
+--[[
+LPEGLJ
+lpvm.lua
+Virtual machine
+Copyright (C) 2014 Rostislav Sacek.
+based on LPeg v1.0 - PEG pattern matching for Lua
+Lua.org & PUC-Rio written by Roberto Ierusalimschy
+http://www.inf.puc-rio.br/~roberto/lpeg/
+
+** Permission is hereby granted, free of charge, to any person obtaining
+** a copy of this software and associated documentation files (the
+** "Software"), to deal in the Software without restriction, including
+** without limitation the rights to use, copy, modify, merge, publish,
+** distribute, sublicense, and/or sell copies of the Software, and to
+** permit persons to whom the Software is furnished to do so, subject to
+** the following conditions:
+**
+** The above copyright notice and this permission notice shall be
+** included in all copies or substantial portions of the Software.
+**
+** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+** SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+**
+** [ MIT license: http://www.opensource.org/licenses/mit-license.php ]
+--]]
+
+local ffi = require "ffi"
+local lpcap = require "lpcap"
+--[[ Only for debug purpose
+local lpprint = require"lpprint"
+--]]
+
+local band, rshift, lshift = bit.band, bit.rshift, bit.lshift
+
+-- {======================================================
+-- Virtual Machine
+-- =======================================================
+
+-- Interpret the result of a dynamic capture: false -> fail;
+-- true -> keep current position; number -> next position.
+-- Return new subject position. 'fr' is stack index where
+-- is the result; 'curr' is current subject position; 'limit'
+-- is subject's size.
+
+local MAXBEHINDPREDICATE = 255 -- max behind for Look-behind predicate
+local MAXOFF = 0xF -- maximum for full capture
+local MAXBEHIND = math.max(MAXBEHINDPREDICATE, MAXOFF) -- maximum before current pos
+local INITBACK = 400 -- default maximum size for call/backtrack stack
+
+local IAny = 0 -- if no char, fail
+local IChar = 1 -- if char != val, fail
+local ISet = 2 -- if char not in val, fail
+local ITestAny = 3 -- in no char, jump to 'offset'
+local ITestChar = 4 -- if char != val, jump to 'offset'
+local ITestSet = 5 -- if char not in val, jump to 'offset'
+local ISpan = 6 -- read a span of chars in val
+local IBehind = 7 -- walk back 'val' characters (fail if not possible)
+local IRet = 8 -- return from a rule
+local IEnd = 9 -- end of pattern
+local IChoice = 10 -- stack a choice; next fail will jump to 'offset'
+local IJmp = 11 -- jump to 'offset'
+local ICall = 12 -- call rule at 'offset'
+local IOpenCall = 13 -- call rule number 'offset' (must be closed to a ICall)
+local ICommit = 14 -- pop choice and jump to 'offset'
+local IPartialCommit = 15 -- update top choice to current position and jump
+local IBackCommit = 16 -- "fails" but jump to its own 'offset'
+local IFailTwice = 17 -- pop one choice and then fail
+local IFail = 18 -- go back to saved state on choice and jump to saved offset
+local IGiveup = 19 -- internal use
+local IFullCapture = 20 -- complete capture of last 'off' chars
+local IOpenCapture = 21 -- start a capture
+local ICloseCapture = 22
+local ICloseRunTime = 23
+
+local Cclose = 0
+local Cposition = 1
+local Cconst = 2
+local Cbackref = 3
+local Carg = 4
+local Csimple = 5
+local Ctable = 6
+local Cfunction = 7
+local Cquery = 8
+local Cstring = 9
+local Cnum = 10
+local Csubst = 11
+local Cfold = 12
+local Cruntime = 13
+local Cgroup = 14
+
+local BCapcandelete = 0x30000
+local maxstack = INITBACK
+local maxcapturedefault = 100
+local maxmemo = 1000
+local usememoization = false
+local trace = false
+
+local FAIL = -1
+local LRFAIL = -1
+local VOID = -2
+local CHOICE = -3
+local CALL = -4
+
+ffi.cdef [[
+typedef struct {
+ int code;
+ int val;
+ int offset;
+ int aux;
+ } PATTERN_ELEMENT;
+typedef struct {
+ int allocsize;
+ int size;
+ PATTERN_ELEMENT *p;
+ } PATTERN;
+typedef struct {
+ int tag;
+ int val;
+ int ps;
+ int cap;
+ } TREEPATTERN_ELEMENT;
+typedef struct {
+ int id;
+ int treesize;
+ PATTERN *code;
+ TREEPATTERN_ELEMENT p[?];
+ } TREEPATTERN;
+
+typedef struct {
+ double s;
+ double X;
+ double memos;
+ int p;
+ int caplevel;
+ int pA;
+ int valuetabletop;
+ } STACK;
+
+typedef struct {
+ double s;
+ int siz;
+ int idx;
+ int kind;
+ int candelete;
+ } CAPTURE;
+
+void *malloc( size_t size );
+void free( void *memblock );
+void *realloc( void *memblock, size_t size );
+]]
+
+local treepatternelement = ffi.typeof('TREEPATTERN_ELEMENT')
+local treepattern = ffi.typeof('TREEPATTERN')
+local patternelement = ffi.typeof('PATTERN_ELEMENT')
+local pattern = ffi.typeof('PATTERN')
+local settype = ffi.typeof('int32_t[8]')
+
+local function resdyncaptures(fr, curr, limit, checkstreamlen)
+ local typ = type(fr)
+ -- false value?
+ if not fr then
+ return FAIL -- and fail
+ elseif typ == 'boolean' then
+ -- true?
+ return curr -- keep current position
+ else
+ local res = fr -- new position
+ if res < curr or (limit and res > limit) or (not limit and checkstreamlen and not checkstreamlen(res - 2)) then
+ error("invalid position returned by match-time capture", 0)
+ end
+ return res
+ end
+ assert(false)
+end
+
+
+-- Add capture values returned by a dynamic capture to the capture list
+-- 'base', nested inside a group capture. 'fd' indexes the first capture
+-- value, 'n' is the number of values (at least 1).
+
+local function adddyncaptures(s, base, index, n, fd, valuetable)
+ -- Cgroup capture is already there
+ assert(base[index].kind == Cgroup and base[index].siz == 0)
+ base[index].idx = 0 -- make it an anonymous group
+ base[index + 1] = {}
+ -- add runtime captures
+ for i = 1, n do
+ base[index + i].kind = Cruntime
+ base[index + i].siz = 1 -- mark it as closed
+ local ind = #valuetable + 1
+ valuetable[ind] = fd[i + 1]
+ base[index + i].idx = ind -- stack index of capture value
+ base[index + i].s = s
+ base[index + i + 1] = {}
+ end
+ base[index + n + 1].kind = Cclose -- close group
+ base[index + n + 1].siz = 1
+ base[index + n + 1].s = s
+ base[index + n + 2] = {}
+end
+
+
+-- Opcode interpreter
+
+local function match(stream, last, o, s, op, valuetable, ...)
+ local arg = { ... }
+ local argcount = select('#', ...)
+ local len = #o
+ local ptr = ffi.cast('const unsigned char*', o)
+ s = s - 1
+ local stackptr = 0 -- point to first empty slot in stack
+ local captop = 0 -- point to first empty slot in captures
+ local STACK = ffi.new("STACK[?]", INITBACK)
+ local CAPTURE = ffi.new("CAPTURE[?]", maxcapturedefault)
+ local CAPTURESTACK = { { capture = CAPTURE, captop = captop, maxcapture = maxcapturedefault } }
+ local capturestackptr = #CAPTURESTACK
+ local maxcapture = maxcapturedefault
+ local stacklimit = INITBACK
+ local L = {}
+ local Memo1, Memo2 = {}, {}
+ local memoind = 0
+ local maxpointer = 2 ^ math.ceil(math.log(op.size) / math.log(2))
+ local nocapturereleased = true
+
+ local p = 0 -- current instruction
+ local streambufsize = 2 ^ 8
+ local streambufsizemask = streambufsize - 1 -- faster modulo
+ local streambufs = {}
+ local streambufoffset = 0
+ local streamstartbuffer = 0
+ local streambufferscount = 0
+ local level = -1
+
+ local function deletestreambuffers()
+ local min = s
+ for i = stackptr - 1, 0, -1 do
+ local val = STACK[i].s
+ if val >= 0 then
+ min = math.min(val, min)
+ end
+ end
+
+ for i = captop - 1, 0, -1 do
+ local val = CAPTURE[i].s
+ if val >= 0 then
+ min = math.min(val, min)
+ end
+ end
+ for i = streamstartbuffer + 1, streambufoffset - streambufsize, streambufsize do
+ -- max behind for full capture and max behind for Look-behind predicate
+ if i + streambufsize + MAXBEHIND < min then
+ streambufs[i] = nil
+ streambufferscount = streambufferscount - 1
+ else
+ streamstartbuffer = i - 1
+ break
+ end
+ end
+ end
+
+ local function addstreamdata(s, last)
+ local len = #s
+ local srcoffset = 0
+ if streambufferscount > 128 then
+ deletestreambuffers()
+ end
+ repeat
+ local offset = bit.band(streambufoffset, streambufsizemask)
+ if offset > 0 then
+ local index = streambufoffset - offset + 1
+ local count = math.min(len, streambufsize - offset)
+ ffi.copy(streambufs[index] + offset, s:sub(srcoffset + 1, srcoffset + 1 + count), count)
+ len = len - count
+ srcoffset = srcoffset + count
+ streambufoffset = streambufoffset + count
+ end
+ if len > 0 then
+ local index = streambufoffset - (bit.band(streambufoffset, streambufsizemask)) + 1
+ local buf = ffi.new('unsigned char[?]', streambufsize)
+ streambufferscount = streambufferscount + 1
+ streambufs[index] = buf
+ local count = math.min(len, streambufsize)
+ ffi.copy(buf, s:sub(srcoffset + 1, srcoffset + 1 + count), count)
+ len = len - count
+ srcoffset = srcoffset + count
+ streambufoffset = streambufoffset + count
+ end
+ if streambufoffset >= 2 ^ 47 then
+ error("too big input stream", 0)
+ end
+ until len == 0
+ end
+
+ local function getstreamchar(s)
+ local offset = bit.band(s, streambufsizemask)
+ local index = s - offset + 1
+ return streambufs[index][offset]
+ end
+
+ local checkstreamlen
+
+ local function getstreamstring(st, en)
+ -- TODO Optimalize access
+ local str = {}
+ local i = st >= 0 and st or 1
+ local to = en >= 0 and en or math.huge
+ while true do
+ if i > to then break end
+ if not checkstreamlen(i - 1) then return end
+ if last and (st < 0 or en < 0) then
+ for j = i, streambufoffset do
+ str[#str + 1] = string.char(getstreamchar(j - 1))
+ end
+ en = en < 0 and streambufoffset + en + 1 or en
+ en = st > 0 and en - st + 1 or en
+ st = st < 0 and streambufoffset + st + 1 or 1
+ return table.concat(str):sub(st, en)
+ else
+ str[#str + 1] = string.char(getstreamchar(i - 1))
+ i = i + 1
+ end
+ end
+ return table.concat(str)
+ end
+
+ function checkstreamlen(index)
+ local str
+ while true do
+ if index < streambufoffset then
+ return true
+ else
+ if last then
+ s = streambufoffset
+ return false
+ end
+ local max = captop
+ for i = stackptr - 1, 0, -1 do
+ local val = STACK[i].X == CHOICE and STACK[i].caplevel or -1
+ if val >= 0 then
+ max = math.min(val, max)
+ end
+ end
+ local n, out, outindex = lpcap.getcapturesruntime(CAPTURE, nil, getstreamstring, false, 0, max, captop, valuetable, unpack(arg, 1, argcount))
+ if n > 0 then
+ for i = stackptr - 1, 0, -1 do
+ local val = STACK[i].caplevel
+ if val > 0 then
+ STACK[i].caplevel = STACK[i].caplevel - n
+ end
+ end
+ captop = captop - n
+ end
+ if outindex > 0 then
+ nocapturereleased = false
+ end
+ str, last = coroutine.yield(1, unpack(out, 1, outindex))
+ addstreamdata(str)
+ end
+ end
+ end
+
+ local function doublecapture()
+ maxcapture = maxcapture * 2
+ local NEWCAPTURE = ffi.new("CAPTURE[?]", maxcapture)
+ ffi.copy(NEWCAPTURE, CAPTURE, ffi.sizeof('CAPTURE') * captop)
+ CAPTURE = NEWCAPTURE
+ CAPTURESTACK[capturestackptr].capture = CAPTURE
+ CAPTURESTACK[capturestackptr].maxcapture = maxcapture
+ end
+
+ local function pushcapture()
+ CAPTURE[captop].idx = op.p[p].offset
+ CAPTURE[captop].kind = band(op.p[p].val, 0x0f)
+ CAPTURE[captop].candelete = band(op.p[p].val, BCapcandelete) ~= 0 and 1 or 0
+ captop = captop + 1
+ p = p + 1
+ if captop >= maxcapture then
+ doublecapture()
+ end
+ end
+
+ local function traceenter(typ, par)
+ level = level + (par or 0)
+ io.write(('%s+%s %s\n'):format((' '):rep(level), typ, valuetable[op.p[p].aux]))
+ end
+
+ local function traceleave(inst)
+ io.write(('%s- %s\n'):format((' '):rep(level), valuetable[op.p[inst].aux]))
+ level = level - 1
+ end
+
+ local function tracematch(typ, start, par, from, to, inst, extra, ...)
+ local n, caps, capscount = lpcap.getcapturesruntime(CAPTURE, o, getstreamstring, true, start, captop, captop, valuetable, ...)
+ local capstr = {}
+ for i = 1, capscount do capstr[i] = tostring(caps[i]) end
+ extra = extra and '(' .. extra .. ')' or ''
+ io.write(('%s=%s %s%s %s %s \n'):format((' '):rep(level), typ, valuetable[op.p[inst].aux], extra,
+ o and o:sub(from, to) or getstreamstring(from, to), table.concat(capstr, " ")))
+ level = level - par
+ end
+
+ local function fail()
+ -- pattern failed: try to backtrack
+ local X
+ repeat -- remove pending calls
+ stackptr = stackptr - 1
+ if stackptr == -1 then
+ p = FAIL
+ return
+ end
+ s = STACK[stackptr].s
+ X = STACK[stackptr].X
+ if usememoization and X == CALL and STACK[stackptr].memos ~= VOID then
+ Memo1[STACK[stackptr].pA + STACK[stackptr].memos * maxpointer] = FAIL
+ Memo2[STACK[stackptr].pA + STACK[stackptr].memos * maxpointer] = FAIL
+ end
+ -- lvar.2 rest
+ if X == LRFAIL then
+ CAPTURESTACK[capturestackptr] = nil
+ capturestackptr = capturestackptr - 1
+ CAPTURE = CAPTURESTACK[capturestackptr].capture
+ maxcapture = CAPTURESTACK[capturestackptr].maxcapture
+ L[STACK[stackptr].pA + s * maxpointer] = nil
+ end
+ if trace and (X == CALL or X == LRFAIL) then traceleave(STACK[stackptr].p - 1) end
+ until X == CHOICE or X >= 0
+ p = STACK[stackptr].p
+ for i = #valuetable, STACK[stackptr].valuetabletop + 1, -1 do
+ table.remove(valuetable)
+ end
+ -- inc.2
+ if X >= 0 then
+ s = X
+ capturestackptr = capturestackptr - 1
+ CAPTURE = CAPTURESTACK[capturestackptr].capture
+ captop = CAPTURESTACK[capturestackptr].captop
+ maxcapture = CAPTURESTACK[capturestackptr].maxcapture
+ local capture = L[STACK[stackptr].pA + STACK[stackptr].s * maxpointer].capturecommit
+ while captop + capture.captop >= maxcapture do
+ doublecapture()
+ end
+ ffi.copy(CAPTURE + captop, capture.capture, capture.captop * ffi.sizeof('CAPTURE'))
+ captop = captop + capture.captop
+ if trace then tracematch('', captop - capture.captop, 1, STACK[stackptr].s + 1, s, STACK[stackptr].p - 1, L[STACK[stackptr].pA + STACK[stackptr].s * maxpointer].level, unpack(arg, 1, argcount)) end
+ CAPTURESTACK[capturestackptr + 1] = nil
+ L[STACK[stackptr].pA + STACK[stackptr].s * maxpointer] = nil
+ else
+ captop = STACK[stackptr].caplevel
+ end
+ end
+
+ local function doublestack()
+ if stackptr >= maxstack then
+ error(("backtrack stack overflow (current limit is %d)"):format(maxstack), 0)
+ end
+ stacklimit = stacklimit * 2
+ stacklimit = (stacklimit > maxstack) and maxstack or stacklimit
+ local NEWSTACK = ffi.new("STACK[?]", stacklimit)
+ ffi.copy(NEWSTACK, STACK, ffi.sizeof('STACK') * stackptr)
+ STACK = NEWSTACK
+ end
+
+ if stream then
+ addstreamdata(o)
+ len = nil
+ o = nil
+ ptr = nil
+ end
+ while true do
+ --[[ Only for debug
+ io.write(("s: |%s| stck:%d, caps:%d \n"):format(s + 1, stackptr, captop))
+ if p ~= FAIL then
+ lpprint.printinst(op.p, p, valuetable)
+ lpprint.printcaplist(CAPTURE, captop, valuetable)
+ end
+ --]]
+ if p == FAIL then return -1 end
+ local code = op.p[p].code
+ if code == IEnd then
+ CAPTURE[captop].kind = Cclose
+ CAPTURE[captop].s = -1
+ return 0, lpcap.getcaptures(CAPTURE, o, getstreamstring, nocapturereleased and s + 1, valuetable, ...)
+ elseif code == IRet then
+ if STACK[stackptr - 1].X == CALL then
+ stackptr = stackptr - 1
+ if trace then tracematch('', STACK[stackptr].caplevel, 1, STACK[stackptr].s + 1, s, STACK[stackptr].p - 1, nil, ...) end
+ p = STACK[stackptr].p
+ if usememoization and STACK[stackptr].memos ~= VOID then
+ local dif = captop - STACK[stackptr].caplevel
+ local caps
+ if dif > 0 then
+ caps = ffi.new("CAPTURE[?]", dif)
+ ffi.copy(caps, CAPTURE + captop - dif, dif * ffi.sizeof('CAPTURE'))
+ end
+ local val = { s, dif, caps }
+ Memo1[STACK[stackptr].pA + STACK[stackptr].memos * maxpointer] = val
+ Memo2[STACK[stackptr].pA + STACK[stackptr].memos * maxpointer] = val
+ end
+ else
+ local X = STACK[stackptr - 1].X
+ -- lvar.1 inc.1
+ if X == LRFAIL or s > X then
+ if trace then tracematch('IB', 0, 0, STACK[stackptr - 1].s + 1, s, STACK[stackptr - 1].p - 1, L[STACK[stackptr - 1].pA + STACK[stackptr - 1].s * maxpointer].level + 1, ...) end
+ STACK[stackptr - 1].X = s
+ p = STACK[stackptr - 1].pA
+ s = STACK[stackptr - 1].s
+ local lambda = L[p + s * maxpointer]
+ lambda.level = lambda.level + 1
+ lambda.X = STACK[stackptr - 1].X
+ STACK[stackptr - 1].caplevel = captop
+ STACK[stackptr - 1].valuetabletop = #valuetable
+ CAPTURESTACK[capturestackptr].captop = captop
+ lambda.capturecommit = CAPTURESTACK[capturestackptr]
+ captop = 0
+ CAPTURE = ffi.new("CAPTURE[?]", maxcapturedefault)
+ CAPTURESTACK[capturestackptr] = { capture = CAPTURE, captop = captop, maxcapture = maxcapturedefault }
+ maxcapture = maxcapturedefault
+ else
+ -- inc.3
+ stackptr = stackptr - 1
+ p = STACK[stackptr].p
+ s = STACK[stackptr].X
+ for i = #valuetable, STACK[stackptr].valuetabletop + 1, -1 do
+ table.remove(valuetable)
+ end
+ local lambda = L[STACK[stackptr].pA + STACK[stackptr].s * maxpointer]
+ capturestackptr = capturestackptr - 1
+ CAPTURE = CAPTURESTACK[capturestackptr].capture
+ captop = CAPTURESTACK[capturestackptr].captop
+ maxcapture = CAPTURESTACK[capturestackptr].maxcapture
+ local capture = lambda.capturecommit
+ while captop + capture.captop >= maxcapture do
+ doublecapture()
+ end
+ ffi.copy(CAPTURE + captop, capture.capture, capture.captop * ffi.sizeof('CAPTURE'))
+ captop = captop + capture.captop
+ if trace then tracematch('', captop - capture.captop, 1, STACK[stackptr].s + 1, s, STACK[stackptr].p - 1, L[STACK[stackptr].pA + STACK[stackptr].s * maxpointer].level, ...) end
+ CAPTURESTACK[capturestackptr + 1] = nil
+ L[STACK[stackptr].pA + STACK[stackptr].s * maxpointer] = nil
+ end
+ end
+ elseif code == IBehind then
+ local n = op.p[p].val
+ if n > s then
+ fail()
+ else
+ s = s - n
+ p = p + 1
+ end
+ elseif code == IJmp then
+ if trace and op.p[p].aux ~= 0 then traceenter('TC') end
+ p = p + op.p[p].offset
+ elseif code == IChoice then
+ if stackptr == stacklimit then
+ doublestack()
+ end
+ STACK[stackptr].X = CHOICE
+ STACK[stackptr].p = p + op.p[p].offset
+ STACK[stackptr].s = s
+ STACK[stackptr].caplevel = captop
+ STACK[stackptr].valuetabletop = #valuetable
+ stackptr = stackptr + 1
+ p = p + 1
+ elseif code == ICall then
+ if stackptr == stacklimit then
+ doublestack()
+ end
+ local k = bit.band(op.p[p].val, 0xffff)
+ if k == 0 then
+ local pA = p + op.p[p].offset
+ local memo = Memo1[pA + s * maxpointer]
+ if usememoization and memo then
+ if trace then traceenter('M', 1) end
+ if memo == FAIL then
+ if trace then traceleave(p) end
+ fail()
+ else
+ local dif = memo[2]
+ if dif > 0 then
+ while captop + dif >= maxcapture do
+ doublecapture()
+ end
+ local caps = memo[3]
+ ffi.copy(CAPTURE + captop, caps, dif * ffi.sizeof('CAPTURE'))
+ captop = captop + dif
+ end
+ if trace then tracematch('M', captop - dif, 1, s + 1, memo[1], p, nil, ...) end
+ s = memo[1]
+ p = p + 1
+ end
+ else
+ if trace then traceenter('', 1) end
+ STACK[stackptr].X = CALL
+ STACK[stackptr].s = s
+ STACK[stackptr].p = p + 1 -- save return address
+ STACK[stackptr].pA = pA
+ STACK[stackptr].memos = s
+ STACK[stackptr].caplevel = captop
+ stackptr = stackptr + 1
+ p = pA
+ if usememoization and not memo then
+ memoind = memoind + 1
+ if memoind > maxmemo then
+ memoind = 0
+ Memo1 = Memo2
+ Memo2 = {}
+ end
+ end
+ end
+ else
+ local pA = p + op.p[p].offset
+ local X = L[pA + s * maxpointer]
+ -- lvar.1 lvar.2
+ if not X then
+ if trace then traceenter('', 1) end
+ CAPTURESTACK[capturestackptr].captop = captop
+ local capture = ffi.new("CAPTURE[?]", maxcapturedefault)
+ capturestackptr = capturestackptr + 1
+ CAPTURESTACK[capturestackptr] = { capture = capture, captop = captop, maxcapture = maxcapturedefault }
+ CAPTURE = capture
+ maxcapture = maxcapturedefault
+ captop = 0
+ L[pA + s * maxpointer] = { X = LRFAIL, k = k, cs = capturestackptr, level = 0 }
+ STACK[stackptr].p = p + 1
+ STACK[stackptr].pA = pA
+ STACK[stackptr].s = s
+ STACK[stackptr].X = LRFAIL
+ stackptr = stackptr + 1
+ p = pA
+ elseif X.X == LRFAIL or k < X.k then
+ -- lvar.3 lvar.5
+ fail()
+ else
+ -- lvar.4
+ local capture = X.capturecommit
+ while captop + capture.captop >= maxcapture do
+ doublecapture()
+ end
+ ffi.copy(CAPTURE + captop, capture.capture, capture.captop * ffi.sizeof('CAPTURE'))
+ captop = captop + capture.captop
+ p = p + 1
+ s = X.X
+ end
+ end
+ elseif code == ICommit then
+ stackptr = stackptr - 1
+ p = p + op.p[p].offset
+ elseif code == IPartialCommit then
+ STACK[stackptr - 1].s = s
+ STACK[stackptr - 1].caplevel = captop
+ STACK[stackptr - 1].valuetabletop = #valuetable
+ p = p + op.p[p].offset
+ elseif code == IBackCommit then
+ stackptr = stackptr - 1
+ s = STACK[stackptr].s
+ captop = STACK[stackptr].caplevel
+ for i = #valuetable, STACK[stackptr].valuetabletop + 1, -1 do
+ table.remove(valuetable)
+ end
+ p = p + op.p[p].offset
+ elseif code == IFailTwice then
+ stackptr = stackptr - 1
+ fail()
+ elseif code == IFail then
+ fail()
+ elseif code == ICloseRunTime then
+ -- invalidate memo
+ for i = 0, stackptr - 1 do
+ STACK[i].memos = VOID
+ end
+ local cs = {}
+ cs.s = o
+ cs.stream = getstreamstring
+ cs.ocap = CAPTURE
+ cs.ptop = arg
+ cs.ptopcount = argcount
+ local out = { outindex = 0, out = {} }
+ local n = lpcap.runtimecap(cs, captop, s + 1, out, valuetable) -- call function
+ captop = captop - n
+ local res = resdyncaptures(out.out[1], s + 1, len and len + 1, checkstreamlen) -- get result
+ -- fail?
+ if res == FAIL then
+ fail()
+ else
+ s = res - 1 -- else update current position
+ n = out.outindex - 1 -- number of new captures
+ -- any new capture?
+ if n > 0 then
+ captop = captop + 1
+ while captop + n + 1 >= maxcapture do
+ doublecapture()
+ end
+ captop = captop + n + 1
+ -- add new captures to 'capture' list
+ adddyncaptures(s + 1, CAPTURE, captop - n - 2, n, out.out, valuetable)
+ end
+ p = p + 1
+ end
+ elseif code == ICloseCapture then
+ local s1 = s + 1
+ assert(captop > 0)
+ -- if possible, turn capture into a full capture
+ if CAPTURE[captop - 1].siz == 0 and
+ s1 - CAPTURE[captop - 1].s < 255 then
+ CAPTURE[captop - 1].siz = s1 - CAPTURE[captop - 1].s + 1
+ p = p + 1
+ else
+ CAPTURE[captop].siz = 1
+ CAPTURE[captop].s = s + 1
+ pushcapture()
+ end
+ elseif code == IOpenCapture then
+ CAPTURE[captop].siz = 0
+ CAPTURE[captop].s = s + 1
+ pushcapture()
+ elseif code == IFullCapture then
+ CAPTURE[captop].siz = band(rshift(op.p[p].val, 4), 0x0F) + 1 -- save capture size
+ CAPTURE[captop].s = s + 1 - band(rshift(op.p[p].val, 4), 0x0F)
+ pushcapture()
+ -- standard mode
+ elseif o then
+ if code == IAny then
+ if s < len then
+ p = p + 1
+ s = s + 1
+ else
+ fail()
+ end
+ elseif code == ITestAny then
+ if s < len then
+ p = p + 1
+ else
+ p = p + op.p[p].offset
+ end
+ elseif code == IChar then
+ if s < len and ptr[s] == op.p[p].val then
+ p = p + 1
+ s = s + 1
+ else
+ fail()
+ end
+ elseif code == ITestChar then
+ if s < len and ptr[s] == op.p[p].val then
+ p = p + 1
+ else
+ p = p + op.p[p].offset
+ end
+ elseif code == ISet then
+ local c = ptr[s]
+ local set = valuetable[op.p[p].val]
+ if s < len and band(set[rshift(c, 5)], lshift(1, band(c, 31))) ~= 0 then
+ p = p + 1
+ s = s + 1
+ else
+ fail()
+ end
+ elseif code == ITestSet then
+ local c = ptr[s]
+ local set = valuetable[op.p[p].val]
+ if s < len and band(set[rshift(c, 5)], lshift(1, band(c, 31))) ~= 0 then
+ p = p + 1
+ else
+ p = p + op.p[p].offset
+ end
+ elseif code == ISpan then
+ while s < len do
+ local c = ptr[s]
+ local set = valuetable[op.p[p].val]
+ if band(set[rshift(c, 5)], lshift(1, band(c, 31))) == 0 then
+ break
+ end
+ s = s + 1
+ end
+ p = p + 1
+ end
+ else
+ -- stream mode
+ if code == IAny then
+ if checkstreamlen(s) then
+ p = p + 1
+ s = s + 1
+ else
+ fail()
+ end
+ elseif code == ITestAny then
+ if checkstreamlen(s) then
+ p = p + 1
+ else
+ p = p + op.p[p].offset
+ end
+ elseif code == IChar then
+ if checkstreamlen(s) and getstreamchar(s) == op.p[p].val then
+ p = p + 1
+ s = s + 1
+ else
+ fail()
+ end
+ elseif code == ITestChar then
+ if checkstreamlen(s) and getstreamchar(s) == op.p[p].val then
+ p = p + 1
+ else
+ p = p + op.p[p].offset
+ end
+ elseif code == ISet then
+ local c = checkstreamlen(s) and getstreamchar(s)
+ local set = valuetable[op.p[p].val]
+ if c and band(set[rshift(c, 5)], lshift(1, band(c, 31))) ~= 0 then
+ p = p + 1
+ s = s + 1
+ else
+ fail()
+ end
+ elseif code == ITestSet then
+ local c = checkstreamlen(s) and getstreamchar(s)
+ local set = valuetable[op.p[p].val]
+ if c and band(set[rshift(c, 5)], lshift(1, band(c, 31))) ~= 0 then
+ p = p + 1
+ else
+ p = p + op.p[p].offset
+ end
+ elseif code == ISpan then
+ while checkstreamlen(s) do
+ local c = getstreamchar(s)
+ local set = valuetable[op.p[p].val]
+ if band(set[rshift(c, 5)], lshift(1, band(c, 31))) == 0 then
+ break
+ end
+ s = s + 1
+ end
+ p = p + 1
+ end
+ end
+ end
+end
+
+local function setmax(val)
+ maxstack = val
+ if maxstack < INITBACK then
+ maxstack = INITBACK
+ end
+end
+
+local function setmaxbehind(val)
+ MAXBEHIND = math.max(MAXBEHINDPREDICATE, MAXOFF, val or 0)
+end
+
+local function enablememoization(val)
+ usememoization = val
+end
+
+local function enabletracing(val)
+ trace = val
+end
+
+-- Get the initial position for the match, interpreting negative
+-- values from the end of the subject
+
+local function initposition(len, pos)
+ local ii = pos or 1
+ -- positive index?
+ if (ii > 0) then
+ -- inside the string?
+ if ii <= len then
+ return ii - 1; -- return it (corrected to 0-base)
+ else
+ return len; -- crop at the end
+ end
+ else
+ -- negative index
+ -- inside the string?
+ if -ii <= len then
+ return len + ii -- return position from the end
+ else
+ return 0; -- crop at the beginning
+ end
+ end
+end
+
+local function lp_match(pat, s, init, valuetable, ...)
+ local i = initposition(s:len(), init) + 1
+ return select(2, match(false, true, s, i, pat.code, valuetable, ...))
+end
+
+local function lp_streammatch(pat, init, valuetable, ...)
+ local params = { ... }
+ local paramslength = select('#', ...)
+ local fce = coroutine.wrap(function(s, last)
+ return match(true, last, s, init or 1, pat.code, valuetable, unpack(params, 1, paramslength))
+ end)
+ return fce
+end
+
+local function retcount(...)
+ return select('#', ...), { ... }
+end
+
+-- Only for testing purpose
+-- stream emulation (send all chars from string one char after char)
+local function lp_emulatestreammatch(pat, s, init, valuetable, ...)
+ local init = initposition(s:len(), init) + 1
+ local fce = lp_streammatch(pat, init, valuetable, ...)
+ local ret, count = {}, 0
+ for j = 1, #s do
+ local pcount, pret = retcount(fce(s:sub(j, j), j == #s)) -- one char
+ if pret[1] == -1 then
+ return -- fail
+ elseif pret[1] == 0 then
+ -- parsing finished
+ -- collect result
+ for i = 2, pcount do
+ ret[count + i - 1] = pret[i]
+ end
+ count = count + pcount - 1
+ return unpack(ret, 1, count)
+ end
+ for i = 2, pcount do
+ ret[count + i - 1] = pret[i]
+ end
+ count = count + pcount - 1
+ end
+ return select(2, fce(s, true)) -- empty string
+end
+
+local function lp_load(str, fcetab, usemeta)
+ local index = 0
+ assert(str)
+ local ptr = ffi.cast('const char*', str)
+ local patsize = ffi.cast('uint32_t*', ptr + index)[0]
+ index = index + 4
+ local len = ffi.sizeof(treepatternelement) * patsize
+
+ local pat
+ if usemeta then
+ pat = treepattern(patsize)
+ else
+ pat = ffi.gc(ffi.cast('TREEPATTERN*', ffi.C.malloc(ffi.sizeof(treepattern, patsize))),
+ function(ct)
+ if ct.code ~= nil then
+ ffi.C.free(ct.code.p)
+ ffi.C.free(ct.code)
+ end
+ ffi.C.free(ct)
+ end)
+ ffi.fill(pat, ffi.sizeof(treepattern, patsize))
+ pat.treesize = patsize
+ pat.id = 0
+ end
+ ffi.copy(pat.p, ptr + index, len)
+ index = index + len
+ if usemeta then
+ pat.code = pattern()
+ else
+ pat.code = ffi.cast('PATTERN*', ffi.C.malloc(ffi.sizeof(pattern)))
+ assert(pat.code ~= nil)
+ pat.code.allocsize = 10
+ pat.code.size = 0
+ pat.code.p = ffi.C.malloc(ffi.sizeof(patternelement) * pat.code.allocsize)
+ assert(pat.code.p ~= nil)
+ ffi.fill(pat.code.p, ffi.sizeof(patternelement) * pat.code.allocsize)
+ end
+ pat.code.size = ffi.cast('uint32_t*', ptr + index)[0]
+ index = index + 4
+ local len = pat.code.size * ffi.sizeof(patternelement)
+ local data = ffi.string(ptr + index, len)
+ index = index + len
+ local count = ffi.cast('uint32_t*', ptr + index)[0]
+ index = index + 4
+ local valuetable = {}
+ for i = 1, count do
+ local tag = ffi.string(ptr + index, 3)
+ index = index + 3
+ --string
+ if tag == 'str' then
+ local len = ffi.cast('uint32_t*', ptr + index)[0]
+ index = index + 4
+ local val = ffi.string(ptr + index, len)
+ index = index + len
+ valuetable[#valuetable + 1] = val
+ elseif tag == 'num' then
+ --number
+ local len = ffi.cast('uint32_t*', ptr + index)[0]
+ index = index + 4
+ local val = ffi.string(ptr + index, len)
+ index = index + len
+ valuetable[#valuetable + 1] = tonumber(val)
+ elseif tag == 'cdt' then
+ --ctype
+ local val = settype()
+ ffi.copy(val, ptr + index, ffi.sizeof(settype))
+ index = index + ffi.sizeof(settype)
+ valuetable[#valuetable + 1] = val
+ elseif tag == 'fnc' then
+ --function
+ local len = ffi.cast('uint32_t*', ptr + index)[0]
+ index = index + 4
+ local fname = ffi.string(ptr + index, len)
+ index = index + len
+ len = ffi.cast('uint32_t*', ptr + index)[0]
+ index = index + 4
+ local val = ffi.string(ptr + index, len)
+ index = index + len
+ if fcetab and fcetab[fname] then
+ assert(type(fcetab[fname]) == 'function', ('"%s" is not function'):format(fname))
+ valuetable[#valuetable + 1] = fcetab[fname]
+ else
+ valuetable[#valuetable + 1] = loadstring(val)
+ end
+ end
+ end
+ pat.code.allocsize = pat.code.size
+ pat.code.p = ffi.C.realloc(pat.code.p, ffi.sizeof(patternelement) * pat.code.allocsize)
+ assert(pat.code.p ~= nil)
+ ffi.copy(pat.code.p, data, ffi.sizeof(patternelement) * pat.code.allocsize)
+ return pat, valuetable
+end
+
+local function lp_loadfile(fname, fcetab, usemeta)
+ local file = assert(io.open(fname, 'rb'))
+ local pat, valuetable = lp_load(assert(file:read("*a")), fcetab, usemeta)
+ file:close()
+ return pat, valuetable
+end
+
+-- ======================================================
+
+return {
+ match = lp_match,
+ streammatch = lp_streammatch,
+ emulatestreammatch = lp_emulatestreammatch,
+ load = lp_load,
+ loadfile = lp_loadfile,
+ setmax = setmax,
+ setmaxbehind = setmaxbehind,
+ enablememoization = enablememoization,
+ enabletracing = enabletracing
+}
diff --git a/tools/lpeg/re.lua b/tools/lpeg/re.lua
new file mode 100644
index 0000000..3d00232
--- /dev/null
+++ b/tools/lpeg/re.lua
@@ -0,0 +1,286 @@
+-- $Id: re.lua,v 1.44 2013/03/26 20:11:40 roberto Exp $
+-- 2014/08/15 changes rostislav
+
+-- imported functions and modules
+local tonumber, print, error = tonumber, print, error
+local setmetatable = setmetatable
+local m = require"lpeglj"
+
+-- 'm' will be used to parse expressions, and 'mm' will be used to
+-- create expressions; that is, 're' runs on 'm', creating patterns
+-- on 'mm'
+local mm = m
+
+-- pattern's metatable
+local mt = getmetatable(mm.P(0))
+mt = m.version() == "1.0.0.0LJ" and m or mt
+
+
+
+-- No more global accesses after this point
+local version = _VERSION
+if version == "Lua 5.2" then _ENV = nil end
+
+
+local any = m.P(1)
+
+
+-- Pre-defined names
+local Predef = { nl = m.P"\n" }
+
+
+local mem
+local fmem
+local gmem
+
+
+local function updatelocale ()
+ mm.locale(Predef)
+ Predef.a = Predef.alpha
+ Predef.c = Predef.cntrl
+ Predef.d = Predef.digit
+ Predef.g = Predef.graph
+ Predef.l = Predef.lower
+ Predef.p = Predef.punct
+ Predef.s = Predef.space
+ Predef.u = Predef.upper
+ Predef.w = Predef.alnum
+ Predef.x = Predef.xdigit
+ Predef.A = any - Predef.a
+ Predef.C = any - Predef.c
+ Predef.D = any - Predef.d
+ Predef.G = any - Predef.g
+ Predef.L = any - Predef.l
+ Predef.P = any - Predef.p
+ Predef.S = any - Predef.s
+ Predef.U = any - Predef.u
+ Predef.W = any - Predef.w
+ Predef.X = any - Predef.x
+ mem = {} -- restart memoization
+ fmem = {}
+ gmem = {}
+ local mt = {__mode = "v"}
+ setmetatable(mem, mt)
+ setmetatable(fmem, mt)
+ setmetatable(gmem, mt)
+end
+
+
+updatelocale()
+
+
+
+local I = m.P(function (s,i) print(i, s:sub(1, i-1)); return i end)
+
+
+local function getdef (id, defs)
+ local c = defs and defs[id]
+ if not c then error("undefined name: " .. id) end
+ return c
+end
+
+
+local function patt_error (s, i)
+ local msg = (#s < i + 20) and s:sub(i)
+ or s:sub(i,i+20) .. "..."
+ msg = ("pattern error near '%s'"):format(msg)
+ error(msg, 2)
+end
+
+local function mult (p, n)
+ local np = mm.P(true)
+ while n >= 1 do
+ if n%2 >= 1 then np = np * p end
+ p = p * p
+ n = n/2
+ end
+ return np
+end
+
+local function equalcap (s, i, c)
+ if type(c) ~= "string" then return nil end
+ local e = #c + i
+ if type(s) == 'function' then -- stream mode
+ if s(i, e - 1) == c then return e else return nil end
+ else
+ if s:sub(i, e - 1) == c then return e else return nil end
+ end
+end
+
+
+local S = (Predef.space + "--" * (any - Predef.nl)^0)^0
+
+local name = m.R("AZ", "az", "__") * m.R("AZ", "az", "__", "09")^0
+
+local arrow = S * "<-"
+
+local seq_follow = m.P"/" + ")" + "}" + ":}" + "~}" + "|}" + (name * arrow) + -1
+
+name = m.C(name)
+
+
+-- a defined name only have meaning in a given environment
+local Def = name * m.Carg(1)
+
+local num = m.C(m.R"09"^1) * S / tonumber
+
+local String = "'" * m.C((any - "'")^0) * "'" +
+ '"' * m.C((any - '"')^0) * '"'
+
+
+local defined = "%" * Def / function (c,Defs)
+ local cat = Defs and Defs[c] or Predef[c]
+ if not cat then error ("name '" .. c .. "' undefined") end
+ return cat
+end
+
+local Range = m.Cs(any * (m.P"-"/"") * (any - "]")) / mm.R
+
+local item = defined + Range + m.C(any)
+
+local Class =
+ "["
+ * (m.C(m.P"^"^-1)) -- optional complement symbol
+ * m.Cf(item * (item - "]")^0, mt.__add) /
+ function (c, p) return c == "^" and any - p or p end
+ * "]"
+
+local function adddef (t, k, exp)
+ if t[k] then
+ error("'"..k.."' already defined as a rule")
+ else
+ t[k] = exp
+ end
+ return t
+end
+
+local function firstdef (n, r) return adddef({n}, n, r) end
+
+
+local function NT (n, b, p)
+ if not b then
+ error("rule '"..n.."' used outside a grammar")
+ else return mm.V(n, p or 0)
+ end
+end
+
+
+local exp = m.P{ "Exp",
+ Exp = S * ( m.V"Grammar"
+ + m.Cf(m.V"Seq" * ("/" * S * m.V"Seq")^0, mt.__add) );
+ Seq = m.Cf(m.Cc(m.P"") * m.V"Prefix"^0 , mt.__mul)
+ * (#seq_follow + patt_error);
+ Prefix = "&" * S * m.V"Prefix" / mt.__len
+ + "!" * S * m.V"Prefix" / mt.__unm
+ + m.V"Suffix";
+ Suffix = m.Cf(m.V"Primary" * S *
+ ( ( m.P"+" * m.Cc(1, mt.__pow)
+ + m.P"*" * m.Cc(0, mt.__pow)
+ + m.P"?" * m.Cc(-1, mt.__pow)
+ + "^" * ( m.Cg(num * m.Cc(mult))
+ + m.Cg(m.C(m.S"+-" * m.R"09"^1) * m.Cc(mt.__pow))
+ )
+ + "->" * S * ( m.Cg((String + num) * m.Cc(mt.__div))
+ + m.P"{}" * m.Cc(nil, m.Ct)
+ + m.Cg(Def / getdef * m.Cc(mt.__div))
+ )
+ + "=>" * S * m.Cg(Def / getdef * m.Cc(m.Cmt))
+ ) * S
+ )^0, function (a,b,f) return f(a,b) end );
+ Primary = "(" * m.V"Exp" * ")"
+ + String / mm.P
+ + Class
+ + defined
+ + "{:" * (name * ":" + m.Cc(nil)) * m.V"Exp" * ":}" /
+ function (n, p) return mm.Cg(p, n) end
+ + "=" * name / function (n) return mm.Cmt(mm.Cb(n), equalcap) end
+ + m.P"{}" / mm.Cp
+ + "{~" * m.V"Exp" * "~}" / mm.Cs
+ + "{|" * m.V"Exp" * "|}" / mm.Ct
+ + "{" * m.V"Exp" * "}" / mm.C
+ + m.P"." * m.Cc(any)
+ + (name * m.Cb("G") * (S * ":" * S * num)^-1 * -arrow + "<" * name * m.Cb("G") * (S * ":" * S * num)^-1 * ">") / NT;
+ Definition = name * arrow * m.V"Exp";
+ Grammar = m.Cg(m.Cc(true), "G") *
+ m.Cf(m.V"Definition" / firstdef * m.Cg(m.V"Definition")^0,
+ adddef) / mm.P
+}
+
+local pattern = S * m.Cg(m.Cc(false), "G") * exp / mm.P * (-any + patt_error)
+
+
+local function compile (p, defs)
+ if mm.type(p) == "pattern" then return p end -- already compiled
+ local cp = pattern:match(p, 1, defs)
+ if not cp then error("incorrect pattern", 3) end
+ return cp
+end
+
+local function match (s, p, i)
+ local cp = mem[p]
+ if not cp then
+ cp = compile(p)
+ mem[p] = cp
+ end
+ return cp:match(s, i or 1)
+end
+
+local function streammatch (p, i)
+ local cp = mem[p]
+ if not cp then
+ cp = compile(p)
+ mem[p] = cp
+ end
+ return cp:streammatch(i or 1)
+end
+
+-- Only for testing purpose
+local function emulatestreammatch(s, p, i)
+ local cp = mem[p]
+ if not cp then
+ cp = compile(p)
+ mem[p] = cp
+ end
+ return cp:emulatestreammatch(s, i or 1)
+end
+
+local function find (s, p, i)
+ local cp = fmem[p]
+ if not cp then
+ cp = compile(p) / 0
+ cp = mm.P{ mm.Cp() * cp * mm.Cp() + 1 * mm.V(1) }
+ fmem[p] = cp
+ end
+ local i, e = cp:match(s, i or 1)
+ if i then return i, e - 1
+ else return i
+ end
+end
+
+local function gsub (s, p, rep)
+ local g = gmem[p] or {} -- ensure gmem[p] is not collected while here
+ gmem[p] = g
+ local cp = g[rep]
+ if not cp then
+ cp = compile(p)
+ cp = mm.Cs((cp / rep + 1)^0)
+ g[rep] = cp
+ end
+ return cp:match(s)
+end
+
+
+-- exported names
+local re = {
+ compile = compile,
+ match = match,
+ streammatch = streammatch,
+ emulatestreammatch = emulatestreammatch,
+ find = find,
+ gsub = gsub,
+ updatelocale = updatelocale,
+}
+
+if version == "Lua 5.1" then _G.re = re end
+
+return re
diff --git a/tools/luajson/json.lua b/tools/luajson/json.lua
new file mode 100644
index 0000000..d8e0e13
--- /dev/null
+++ b/tools/luajson/json.lua
@@ -0,0 +1,25 @@
+--[[
+ Licensed according to the included 'LICENSE' document
+ Author: Thomas Harning Jr