I use lark to parse lines of a log file. The log file contains some equations and I would like to extract the left hand side and right hand side of the equations and store them in a dictionary. However, in my context the parsing seems to be kind of slow. I accelerated the parsing by switching from the Earley algorithm to LALR(1) parsing, as suggested by the lark tutorial. However, I would like to squeeze out the last bit of performance, by also »going tree-less«. Unfortunately, it does not work as expected. Consider the following MWE:
from lark import Lark, Transformer
OPT = True
def parser() -> Lark:
grammar = r"""
equations: [equation ("," equation)*]
equation: identifier "=" rhs
identifier: CNAME
rhs: num
| vector
vector: "[" [num ("," num)*] "]"
num: SIGNED_NUMBER
%import common.CNAME
%import common.SIGNED_NUMBER
%import common.WS
%ignore WS
"""
if OPT:
eq_parser = Lark(grammar, start="equations", parser="lalr", transformer=ToDict)
else:
eq_parser = Lark(grammar, start="equations", parser="lalr")
return eq_parser
class ToDict(Transformer):
def equations(self, eqs):
return {lhs: rhs for eq in eqs for lhs, rhs in eq.items()}
def equation(self, eq):
(ident, _rhs) = eq
return {ident: _rhs}
def rhs(self, num_vec):
(num_vec,) = num_vec
return num_vec
def identifier(self, ident):
(ident,) = ident
return str(ident)
def num(self, n):
(n,) = n
return float(n)
def vector(self, vec):
return list(vec)
if __name__ == "__main__":
line = "a=3.14, b=[1.41, 1.732]"
prsr = parser()
if OPT:
parsed = prsr.parse(line)
else:
parsed = ToDict().transform(prsr.parse(line))
print(parsed)
If OPT
is set to False
, the expression {'a': 3.14, 'b': [1.41, 1.732]}
is returned as expected. But if OPT
is set to True
, this happens:
Traceback (most recent call last):
File ".../mwe.py", line 55, in <module>
parsed = prsr.parse(line)
File ".../anaconda3/envs/lark/lib/python3.10/site-packages/lark/lark.py", line 625, in parse
return self.parser.parse(text, start=start, on_error=on_error)
File ".../anaconda3/envs/lark/lib/python3.10/site-packages/lark/parser_frontends.py", line 96, in parse
return self.parser.parse(stream, chosen_start, **kw)
File ".../anaconda3/envs/lark/lib/python3.10/site-packages/lark/parsers/lalr_parser.py", line 41, in parse
return self.parser.parse(lexer, start)
File ".../anaconda3/envs/lark/lib/python3.10/site-packages/lark/parsers/lalr_parser.py", line 171, in parse
return self.parse_from_state(parser_state)
File ".../anaconda3/envs/lark/lib/python3.10/site-packages/lark/parsers/lalr_parser.py", line 179, in parse_from_state
state.feed_token(token)
File ".../anaconda3/envs/lark/lib/python3.10/site-packages/lark/parsers/lalr_parser.py", line 150, in feed_token
value = callbacks[rule](s)
TypeError: ToDict.identifier() missing 1 required positional argument: 'ident'
What is the error message trying to tell me?
You need to pass in an instance of the Transfomer class, not the class itself:
transformer=ToDict()
instead of
transformer=ToDict