Lua Balanced |
|
It is inspired by Damian Conway's Text::Balanced[1] in Perl.
This code is used by ListComprehensions.
The unique feature of this implementation is that that it does not rigorously lex and parse the Lua grammar. It doesn't need to. It assumes during the parse that the Lua code is syntactically correct (which can be verified later using loadstring). By assuming this, extraction of delimited sequences is significantly simplified yet can still be robust, and it also supports supersets of the Lua grammar. The code, which is written entirely in Lua, is just under 200 lines of Lua code (compare to Yueliang used in MetaLua, where the lexer along is a few hundred lines).
local lb = require "luabalanced" -- Extract Lua expression starting at position 4. print(lb.match_expression("if x^2 + x > 5 then print(x) end", 4)) --> x^2 + x > 5 16 -- Extract Lua string starting at (default) position 1. print(lb.match_string([["test\"123" .. "more"]])) --> "test\"123" 12 -- Break Lua code into code types. lb.gsub([[ local x = 1 -- test print("x=", x) ]], function(u, s) print(u .. '[' .. s .. ']') end) --[[output: e[ local x = 1 ] c[-- test ] e[ print(] s["x="] e[, x) ] ]]
-- luabalanced_test.lua -- test for luabalanced.lua local lb = require "luabalanced" local tuple = require "tuple" -- utility function for test suite. local function asserteq(a, b) if a ~= b then error(tostring(a) .. ' == ' .. tostring(b) .. ' failed', 2) end end -- utility function (wrap function: store return in tuple and protect) local function wrap2(f) return function(s, pos) local res = tuple(pcall(function() return f(s, pos) end)) if not res[1] then return 'error' else return tuple(unpack(res, 2, res.n)) end end end --## match_bracketed tests -- test wrapper for lb.match_bracketed local mb = wrap2(lb.match_bracketed) -- trivial tests asserteq(mb'', tuple(nil, 1)) asserteq(mb'a', tuple(nil, 1)) asserteq(mb'{', 'error') asserteq(mb'}', tuple(nil, 1)) asserteq(mb'{[}]', 'error') asserteq(mb('[{}]'), tuple('[{}]', 5)) -- test with pos asserteq(mb('[][a(a)a].', 3), tuple('[a(a)a]', 10)) -- test with strings asserteq(mb('[ "]" ]'), tuple('[ "]" ]', 8)) asserteq(mb("[ '[' ]"), tuple("[ '[' ]", 8)) asserteq(mb("[ [=[ ]=] ]"), tuple("[ [=[ ]=] ]", 12)) asserteq(mb("[[ ] ]]"), tuple("[[ ] ]]", 8)) asserteq(mb("[=[ [ ]=]"), tuple("[=[ [ ]=]", 10)) --## match_expression tests -- test wrapper for lb.match_expression local me = wrap2(lb.match_expression) asserteq(me'a', tuple('a', 2)) asserteq(me'a b=c', tuple('a ', 3)) asserteq(me'a and b', tuple('a and b', 8)) asserteq(me'a and b ', tuple('a and b ', 9)) asserteq(me'a and b c', tuple('a and b ', 9)) asserteq(me'a+b', tuple('a+b', 4)) asserteq(me'a+b b=c', tuple('a+b ', 5)) asserteq(me'{function()end}+b c', tuple('{function()end}+b ', 19)) asserteq(me'{} e', tuple('{} ', 4)) asserteq(me'() e', tuple('() ', 4)) asserteq(me'"" e', tuple('"" ', 4)) asserteq(me"'' e", tuple("'' ", 4)) asserteq(me'a[1] e', tuple('a[1] ', 6)) asserteq(me'ab.cd e', tuple('ab.cd ', 7)) asserteq(me'ab:cd() e', tuple('ab:cd() ', 9)) asserteq(me'(x) (y) z', tuple('(x) (y) ', 9)) asserteq(me'x >= y', tuple('x >= y', 7)) -- numbers asserteq(me'1e2 a', tuple('1e2 ', 5)) asserteq(me'1e+2 a', tuple('1e+2 ', 6)) asserteq(me'1.2e+2 a', tuple('1.2e+2 ', 8)) asserteq(me'.2e+2 a', tuple('.2e+2 ', 7)) -- comments asserteq(me'a+ -- b\nc', tuple('a+ -- b\nc', 10)) asserteq(me'a --[[]] b', tuple('a --[[]] ', 10)) asserteq(me'a+ --[[]] b', tuple('a+ --[[]] b', 12)) asserteq(me'a --[[]] + b', tuple('a --[[]] + b', 13)) asserteq(me'a+ --[[]] --[=[]=] b', tuple('a+ --[[]] --[=[]=] b', 21)) asserteq(me'a+ -- b\n -- b\n b c', tuple('a+ -- b\n -- b\n b ', 18)) -- check for exceptions giving lots of possibly not syntactically -- correct data. local text = io.open'luabalanced_test.lua':read'*a' for i=1,#text do local res = me(text,i) if res[1] == 'error' and not res[2]:match('syntax error') then error(res[2]) end end --## match_explist tests local ml = function(...) local res = wrap2(lb.match_explist)(...) res[1] = table.concat(res[1], '|') return res end asserteq(ml ' d', tuple(' ', 2)) asserteq(ml 'a+b,b*c d', tuple('a+b|b*c ', 9)) --## match_namelist tests local ml = function(...) local res = wrap2(lb.match_namelist)(...) res[1] = table.concat(res[1], '|') return res end asserteq(ml ' ', tuple('', 1)) asserteq(ml 'a b', tuple('a', 3)) asserteq(ml 'a,b d', tuple('a|b', 5)) asserteq(ml 'a,b+d', tuple('a|b', 4)) --## gsub tests local ls = lb.gsub local function f(u, s) return '[' .. u .. ':' .. s .. ']' end asserteq(ls('', f), '') asserteq(ls(' ', f), '[e: ]') asserteq(ls(' "z" ;', f), '[e: ][s:"z"][e: ;]') asserteq(ls(' --[[z]] ;', f), '[e: ][c:--[[z]]][e: ;]') asserteq(ls(' --z\n ;', f), '[e: ][c:--z\n][e: ;]') asserteq(ls(' --z', f), '[e: ][c:--z]') asserteq(ls('[][=[ ] ]=] ;', f), '[e:[]][s:[=[ ] ]=]][e: ;]') asserteq(ls('a - b --[[d]] .. "--"', f), '[e:a - b ][c:--[[d]]][e: .. ][s:"--"]') print 'DONE'
-- luabalanced.lua -- Extracted delimited Lua sequences from strings.[1] -- Inspired by Damian Conway's Text::Balanced[2] in Perl. -- -- [1] http://lua-users.org/wiki/LuaBalanced -- [2] http://search.cpan.org/dist/Text-Balanced/lib/Text/Balanced.pm -- -- (c) 2008, David Manura, Licensed under the same terms as Lua (MIT license). -- local M = {} local assert = assert local table_concat = table.concat -- map opening brace <-> closing brace. local ends = { ['('] = ')', ['{'] = '}', ['['] = ']' } local begins = {}; for k,v in pairs(ends) do begins[v] = k end -- Match Lua string in string <s> starting at position <pos>. -- Returns <string>, <posnew>, where <string> is the matched -- string (or nil on no match) and <posnew> is the character -- following the match (or <pos> on no match). -- Supports all Lua string syntax: "...", '...', [[...]], [=[...]=], etc. local function match_string(s, pos) pos = pos or 1 local posa = pos local c = s:sub(pos,pos) if c == '"' or c == "'" then pos = pos + 1 while 1 do pos = assert(s:find("[" .. c .. "\\]", pos), 'syntax error') if s:sub(pos,pos) == c then local part = s:sub(posa, pos) return part, pos + 1 else pos = pos + 2 end end else local sc = s:match("^%[(=*)%[", pos) if sc then local _; _, pos = s:find("%]" .. sc .. "%]", pos) assert(pos) local part = s:sub(posa, pos) return part, pos + 1 else return nil, pos end end end M.match_string = match_string -- Match bracketed Lua expression, e.g. "(...)", "{...}", "[...]", "[[...]]", -- [=[...]=], etc. -- Function interface is similar to match_string. local function match_bracketed(s, pos) pos = pos or 1 local posa = pos local ca = s:sub(pos,pos) if not ends[ca] then return nil, pos end local stack = {} while 1 do pos = s:find('[%(%{%[%)%}%]\"\']', pos) assert(pos, 'syntax error: unbalanced') local c = s:sub(pos,pos) if c == '"' or c == "'" then local part; part, pos = match_string(s, pos) assert(part) elseif ends[c] then -- open local mid, posb if c == '[' then mid, posb = s:match('^%[(=*)%[()', pos) end if mid then pos = s:match('%]' .. mid .. '%]()', posb) assert(pos, 'syntax error: long string not terminated') if #stack == 0 then local part = s:sub(posa, pos-1) return part, pos end else stack[#stack+1] = c pos = pos + 1 end else -- close assert(stack[#stack] == assert(begins[c]), 'syntax error: unbalanced') stack[#stack] = nil if #stack == 0 then local part = s:sub(posa, pos) return part, pos+1 end pos = pos + 1 end end end M.match_bracketed = match_bracketed -- Match Lua comment, e.g. "--...\n", "--[[...]]", "--[=[...]=]", etc. -- Function interface is similar to match_string. local function match_comment(s, pos) pos = pos or 1 if s:sub(pos, pos+1) ~= '--' then return nil, pos end pos = pos + 2 local partt, post = match_string(s, pos) if partt then return '--' .. partt, post end local part; part, pos = s:match('^([^\n]*\n?)()', pos) return '--' .. part, pos end -- Match Lua expression, e.g. "a + b * c[e]". -- Function interface is similar to match_string. local wordop = {['and']=true, ['or']=true, ['not']=true} local is_compare = {['>']=true, ['<']=true, ['~']=true} local function match_expression(s, pos) pos = pos or 1 local posa = pos local lastident local poscs, posce while pos do local c = s:sub(pos,pos) if c == '"' or c == "'" or c == '[' and s:find('^[=%[]', pos+1) then local part; part, pos = match_string(s, pos) assert(part, 'syntax error') elseif c == '-' and s:sub(pos+1,pos+1) == '-' then -- note: handle adjacent comments in loop to properly support -- backtracing (poscs/posce). poscs = pos while s:sub(pos,pos+1) == '--' do local part; part, pos = match_comment(s, pos) assert(part) pos = s:match('^%s*()', pos) posce = pos end elseif c == '(' or c == '{' or c == '[' then local part; part, pos = match_bracketed(s, pos) elseif c == '=' and s:sub(pos+1,pos+1) == '=' then pos = pos + 2 -- skip over two-char op containing '=' elseif c == '=' and is_compare[s:sub(pos-1,pos-1)] then pos = pos + 1 -- skip over two-char op containing '=' elseif c:match'^[%)%}%];,=]' then local part = s:sub(posa, pos-1) return part, pos elseif c:match'^[%w_]' then local newident,newpos = s:match('^([%w_]+)()', pos) if pos ~= posa and not wordop[newident] then -- non-first ident local pose = ((posce == pos) and poscs or pos) - 1 while s:match('^%s', pose) do pose = pose - 1 end local ce = s:sub(pose,pose) if ce:match'[%)%}\'\"%]]' or ce:match'[%w_]' and not wordop[lastident] then local part = s:sub(posa, pos-1) return part, pos end end lastident, pos = newident, newpos else pos = pos + 1 end pos = s:find('[%(%{%[%)%}%]\"\';,=%w_%-]', pos) end local part = s:sub(posa, #s) return part, #s+1 end M.match_expression = match_expression -- Match name list (zero or more names). E.g. "a,b,c" -- Function interface is similar to match_string, -- but returns array as match. local function match_namelist(s, pos) pos = pos or 1 local list = {} while 1 do local c = #list == 0 and '^' or '^%s*,%s*' local item, post = s:match(c .. '([%a_][%w_]*)%s*()', pos) if item then pos = post else break end list[#list+1] = item end return list, pos end M.match_namelist = match_namelist -- Match expression list (zero or more expressions). E.g. "a+b,b*c". -- Function interface is similar to match_string, -- but returns array as match. local function match_explist(s, pos) pos = pos or 1 local list = {} while 1 do if #list ~= 0 then local post = s:match('^%s*,%s*()', pos) if post then pos = post else break end end local item; item, pos = match_expression(s, pos) assert(item, 'syntax error') list[#list+1] = item end return list, pos end M.match_explist = match_explist -- Replace snippets of code in Lua code string <s> -- using replacement function f(u,sin) --> sout. -- <u> is the type of snippet ('c' = comment, 's' = string, -- 'e' = any other code). -- Snippet is replaced with <sout> (unless <sout> is nil or false, in -- which case the original snippet is kept) -- This is somewhat analogous to string.gsub . local function gsub(s, f) local pos = 1 local posa = 1 local sret = '' while 1 do pos = s:find('[%-\'\"%[]', pos) if not pos then break end if s:match('^%-%-', pos) then local exp = s:sub(posa, pos-1) if #exp > 0 then sret = sret .. (f('e', exp) or exp) end local comment; comment, pos = match_comment(s, pos) sret = sret .. (f('c', assert(comment)) or comment) posa = pos else local posb = s:find('^[\'\"%[]', pos) local str if posb then str, pos = match_string(s, posb) end if str then local exp = s:sub(posa, posb-1) if #exp > 0 then sret = sret .. (f('e', exp) or exp) end sret = sret .. (f('s', str) or str) posa = pos else pos = pos + 1 end end end local exp = s:sub(posa) if #exp > 0 then sret = sret .. (f('e', exp) or exp) end return sret end M.gsub = gsub return M
The following file is used by the test suite:
-- tuple.lua -- Simple tuple implementation using tables. -- (c) 2008, David Manura, Licensed under the same terms as Lua (MIT license). local select = select local tostring = tostring local setmetatable = setmetatable local table_concat = table.concat local mt = {} local function tuple(...) local t = setmetatable({n=select('#',...), ...}, mt) return t end function mt:__tostring() local ts = {} for i=1,self.n do local v = self[i] ts[#ts+1] = type(v) == 'string' and string.format('%q', v) or tostring(self[i]) end return 'tuple(' .. table_concat(ts, ',') .. ')' end function mt.__eq(a, b) if a.n ~= b.n then return false end for i=1,a.n do if a[i] ~= b[i] then return false end end return true end return tuple
This module is new and likely still has some bugs.