Module:FParser
La documentation pour ce module peut être créée à Module:FParser/doc
local lexer = {} local parser = {} --[[ These parser functions are generic functions to build a parser. It works with "Lexing" and "Parsing" functions. The parser is initialized by the "parse" function, who generates a "state" object that must be a parameter of each parsing functions, and eventually returns the main node of the AST if everything goes well * Lexing functions have one parameter, the state of the parser, and returns a modified state if they could find the terminals they are supposed to recognize and or nothing if the parse fails. * Parsing functions always have a state as unique parameter. They can be divided into * Generic one, written in this module that corresponds that helps to build a parser but don't generate nodes of the AST themselves, like "alternative", "chain", "star" or "plus" * "chain" corresponds to the concatenation operation in a grammar. for example a function that parses the EBNF rule twelve = "1", "2" ; will be generated by chain{lex_char("1"), lex_char("2")} * "alternative" corresponds to the alternative operation in a grammar, for example a function that parses the EBNF rule digit_excluding_zero = "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ; will be written alternative{lex_char("1"), lex_char("2"), lex_char("3"), lex_char("4"), lex_char("5"), lex_char("6"), lex_char("7"), lex_char("8"), lex_char("9")} * User written one, that are functions that must generate a node of the AST as a "node" attribute of the "state" object. To do that they must use attributes of the state object filled by the generic one like * "parsed", the string parsed by the last lexing function, * "acc", the list of nodes that are generated by the function that are passed to function that iterates their call namely "star" and "plus" They return nothing if the parse fails, or the state of the last state returned by a lexer function called if they don't. Other functions are * maybe : a function to avoid checking if the state is nil every time : if the state is in a fail state, it does not apply the parsing or lexing function. Maybe allows to chain the application of lexing and parsing function easily. attribute of the "newstate" object to be able to access the "acc" or "node" of the previous object in a chain, for example. * idop : a function that takes a function as parameter that is used to compute an action on the current state in a chain of parsing function, mainly modifying variables in the closure. Functions passed to idop returns nothing. Usable for example for logging. * statemodop : a function that takes a function as parameter ; who computes a node in the AST from the state and the variables of the closure and typically adding it to the state. Those functions have the same signatures than parsing functions, but typically they do not parse anything. --]] -------------------------------------------------------------------------------- -- lexer -------------------------------------------------------------------------------- local function lex_regex(state, regex) local res = string.match(state.newstate.str, "^" .. regex) if res then local newstate = {} local len = string.len(res) newstate.str = state.newstate.str:sub(len + 1) newstate.pos = state.newstate.pos + len return { ['lexed'] = res, ["newstate"] = newstate, ["node"] = state.node -- forwarding node in case it's not consumed -- case of lexing a closing parenthesis after the node creation. -- this is to avoid to have to create a closure variable } else return nil end end lexer.lex_regex = lex_regex local function lex_epsilon(state) return lex_regex(state, "") end --[[ Tests: p.parse("a", p.chain{p.lexer.lex_epsilon, p.lex_char("a"),p.lexer.lex_epsilon}) --]] lexer.lex_epsilon = lex_epsilon local lex_char = function(char) return function(state) return lex_regex(state, char) end end -- tested with "p.parse("/", p.lex_char('/'))" lexer.lex_char = lex_char function lexer.open_parenthesis(state) return lex_regex(state, "[(]") end function lexer.close_parenthesis(state) return lex_regex(state, "[)]") end function lexer.lex_integer(state) return lex_regex(state, "[0-9]+") end function lexer.eat_blanks(state) return lex_regex(state,' *') end parser.lexer = lexer ------------------------------------------------------- -- generic parser property ---------------------------------------------------------- local function maybe(func) return function(state) if state then return func(state) end end end parser.maybe = maybe local function accumulate(aggregate_func) return function(state) return { ["newstate"]=state.newstate, ["node"]= aggregate_func(state.acc) } end end parser.accumulate = accumulate local function map_node(map_func) return function(state) if state then return { ["newstate"] = state.newstate, ["node"] = map_func(state.node) } end end end parser.map_node = map_node local function idop(func) return function(state) if state then func(state) return state end end end parser.idop = idop -- logs local show = function (string) return idop(function(state) mw.log(string) end) end parser.log = show parser.show = show local dump_state = idop(function(state) mw.logObject(state) end) parser.dump_state = dump_state -- this allows avoiding to pass the state beetween each functions if they were called by hand local function chain(funcs) return function(state) local i = 1 local res = funcs[1](state) while i < #funcs and res do i = i+1 if funcs[i] == nil then error("a nil func in chain") end res = funcs[i](res) end return res end end --[[ Tests : p.parse("aba", p.chain{p.lex_char('a'), p.lex_char('b'), p.lex_char('a')}) => yes p.parse("aba", p.chain{p.lex_char('a'), p.lex_char('b')}) => nope p.parse("aba", p.chain{p.lex_char('a'), p.lex_char('b'), p.lex_char('a'), p.lex_char('a')}) => nope --]] parser.chain = chain -- higher order function that can parse an alternative between several non terminals. -- returns the state of the first match local function alternative(funcs) return function(state) local i = 1 while i <= #funcs do local res = funcs[i](state) if res then return res end i=i+1 end end end --[[ Tests : p.parse("a", p.alternative{p.lex_char('a'), p.lex_char('b')}) => yes p.parse("b", p.alternative{p.lex_char('a'), p.lex_char('b')}) => yes p.parse("c", p.alternative{p.lex_char('a'), p.lex_char('b')}) => nope --]] parser.alternative = alternative local function star(parsing_func) local function star_rec(state, acc, i) local res = chain{ parsing_func, idop( function (stat) table.insert(acc, stat.node) end ), }(state) if res then return star_rec(res, acc, i + 1) else return state, acc end end return function(state) if state then local acc = {} local result, acc2 = star_rec(state, acc, 1) result.acc = acc2 return result end end end --[[ Tests: p.parse("aaab", p.chain{p.star(p.lex_char("a")), p.lex_char("b")}) => yes p.parse("b", p.chain{p.star(p.lex_char("a")), p.lex_char("b")}) => yes --]] parser.star = star local function plus(parsing_func) return function(state) local firstnode local acc return chain{ parsing_func, idop( function(state) firstnode = state.node end ), star(parsing_func), function(state) acc = state.acc table.insert(acc, 1, firstnode) state.acc = acc return state end }(state) end end --[[ res = p.parse("aaab", p.chain{ p.plus( p.chain{ p.lex_char("a"), p.statemodop(function(res) res.node="a" ; return res; end) } ), p.lex_char("b") } ) --]] parser.plus = plus --[[ Tests : p.parse("a", p.chain{p.lex_char('a'), p.idop(function (state) end )}) => yes p.parse("ab", p.chain{p.lex_char('a'), p.idop(function (state) end), p.lex_char('b') }) => nope p.parse("ab", p.chain{p.lex_char('a'), p.idop(function (state) end), p.lex_char('a') }) => nope --]] local function questionmark(parsing_func) return function(state) local node = nil local res=alternative{ chain{ parsing_func, idop(function (stat) node = stat.node end) }, lex_epsilon }(state) res.node = node return res end end parser.questionmark = questionmark -------------------------------------------------------------------------------- -- main function that launches the parsing -------------------------------------------------------------------------------- parser.parse = function (string, base_parsing_function) local state = {} state.newstate = {} state.newstate.str = string state.newstate.pos = 1 local res_node = nil local res = chain{ base_parsing_function, idop(function(stat) res_node = stat.node end), lexer.eat_blanks }(state) if res and res.newstate.str=="" then return res_node end return nil end -- a function to create an AST node which represents an nary operator. Could parse an expression on the form « 1 + 2 + 3 + 4 » and return a « sum » AST node wich have a list of nodes for operands sum([number(1), number(2), number(3) number(4) ) in our example local function nary_op_parser ( first_node_parser, -- a parser that parses only the first element of the operation (« 1 ») next_nodes_parser, -- a parser that parses the infix operation and one node ( « + 2 » and the subsequent) acc_result_node_creator,-- a function that takes the lists of the node generated by the previous parser and generates the final node single_creator -- optional, a function corner case when the expression has no actual operator one element like « 1 » (one might just want to get a « number(1) » AST node instead of « sum([number(1)]) » -- if that’s not the wanted behavior, provide a function to build a node specific for that case. Takes a node, the result of the first_node_parser . ) return function(state) local res local firstNode res = chain{ first_node_parser, idop( function(state) firstNode = state.node end ), star( next_nodes_parser ) }(state) if res then if res.acc and #(res.acc) > 0 then local acc = res.acc table.insert(acc, 1, firstNode) res.node = acc_result_node_creator(acc) else single_creator = single_creator or function(node) return node end res.node = single_creator(firstNode) end res.acc = nil return res end end end parser.nary_op_parser = nary_op_parser return parser