I'm trying to parse some commands-like string/file content into a Dict output and I learned about pyparsing.
So let's say I have the following input:
str = "p1 start a, {alias = b, for : 30}; c, d stop e"
and to parse it, I'm using this:
import pyparsing as pp
grammar = pp.Forward()
SEP = pp.one_of(", ;")
EQ = pp.Suppress(pp.one_of(': ='))
LBRACE, RBRACE = map(pp.Suppress,"{}")
CMD_KEYWORD = (pp.CaselessKeyword("start") | pp.CaselessKeyword("stop") | pp.CaselessKeyword("resume"))
platform = pp.one_of("p1 p2 p3")("platform")
alias = pp.Word(pp.alphanums)
prop = pp.Word(pp.alphanums)
value = pp.Word(pp.alphanums)
prop_value = pp.Dict(pp.Group(prop + EQ + value))
task_config = LBRACE + pp.delimitedList(prop_value, delim = SEP) + RBRACE
command = CMD_KEYWORD + pp.Group(pp.delimitedList(task_config | alias, delim = SEP))("tasks")
expr = platform + command[1, ...]("commands")
grammar <<= expr
res = grammar.parse_string(str)
print(res.as_dict())
print(res.as_list())
which results in the following Dict and List
{'platform': 'p1', 'tasks': ['e'], 'commands': ['start', {'alias': 'b', 'for': '30'}, 'stop', ['e']]}
['p1', 'start', ['a', ['alias', 'b'], ['for', '30'], 'c', 'd'], 'stop', ['e']]
While what I was(still) trying to achieve, is to get the output in a specific Dict format, something like this:
{
[
{
'platform': 'p1',
'commands': [
{'cmd': 'start', 'tasks': [{'alias': 'a'}, {'alias': 'b', 'for': '30'}, {'alias': 'c'}, {'alias': 'd'}]},
{'cmd': 'stop', 'tasks': [{'alias': 'e'}]}
],
}
]
}
Edit:
After some trial and errors I managed to (almost) achieve what I'm aiming for after making some changes to my parser grammar:
import pyparsing as pp
def set_alias(t):
return {"alias": t[0]}
grammar = pp.Forward()
SEP = pp.one_of(", ;")
EQ = pp.Suppress(pp.one_of(': ='))
LBRACE, RBRACE = map(pp.Suppress,"{}")
OPT_SEP = pp.Suppress(pp.Opt(SEP))
CMD_KEYWORD = (pp.CaselessKeyword("start") | pp.CaselessKeyword("stop") | pp.CaselessKeyword("resume"))("cmd")
platform = pp.one_of("p1 p2 p3")("platform")
alias = ~(CMD_KEYWORD | platform) + pp.Word(pp.alphanums)
prop = pp.Word(pp.alphanums)
value = pp.Word(pp.alphanums)
prop_value = pp.Dict(pp.Group(prop + EQ + value))
task_config = LBRACE + pp.Group(pp.delimitedList(prop_value, delim = SEP)) + RBRACE
command = pp.Group(CMD_KEYWORD + pp.Group(pp.OneOrMore((task_config | alias.set_parse_action(set_alias)) + OPT_SEP))("tasks"))
expr = platform + command[1, ...]("commands")
grammar <<= pp.OneOrMore(expr + OPT_SEP)
print(res.as_dict())
print(res.as_list())
But when I'm testing it with the following input:
p1 start a, {alias = b, for : 30}; c, d stop e p2 resume f
I get:
{'platform': 'p2', 'commands': [{'cmd': 'resume', 'tasks': [{'alias': 'f'}]}]}
['p1', ['start', [{'alias': 'a'}, [['alias', 'b'], ['for', '30']], {'alias': 'c'}, {'alias': 'd'}]], ['stop', [{'alias': 'e'}]], 'p2', ['resume', [{'alias': 'f'}]]]
And as you can see the res.as_list() returns all the expected tokens, but the res.as_dict() only returns the 'platform': 'p2' part missing the 'platfrom': 'p1' one and I can't figure out the reason for it.
Edit2:
I've solved it for now, by changing the last part to:
expr = pp.Dict(pp.Group(platform + command[1, ...]("commands")))
grammar <<= pp.OneOrMore(expr + OPT_SEP)
and I got the following Dict as output:
{'p1': {'platform': 'p1', 'commands': [{'cmd': 'start', 'tasks': [{'alias': 'a'}, {'alias': 'b', 'for': '30'}, {'alias': 'c'}, {'alias': 'd'}]}, {'cmd': 'stop', 'tasks': [{'alias': 'e'}]}]},
'p2': {'platform': 'p2', 'commands': [{'cmd': 'resume', 'tasks': [{'alias': 'f'}]}]}}
[['p1', ['start', [{'alias': 'a'}, [['alias', 'b'], ['for', '30']], {'alias': 'c'}, {'alias': 'd'}]], ['stop', [{'alias': 'e'}]]], ['p2', ['resume', [{'alias': 'f'}]]]]
Maybe I'll answer my own question and mark it as solved later, because I realised that I need to update my parser to retrun a List of Dictionaries instead of just a large Dict because the order of processing the parser output is very important.
So, I'll answer my own question tested on the following input string:
p1 start a, {alias = b, for : 30}; c, d stop e; p2 resume f p3 start {name = g, at = 5}
import pyparsing as pp
def set_alias(t):
return {"alias": t[0]}
def set_expr(t):
for expre in t.as_dict().values():
result.append(expre)
str = "p1 start a, {alias = b, for : 30}; c, d stop e; p2 resume f p3 start {name = g, at = 5}"
# result will represent the output as a List of Dictionaries
result = []
grammar = pp.Forward()
SEP = pp.one_of(", ;")
EQ = pp.Suppress(pp.one_of(': ='))
LBRACE, RBRACE = map(pp.Suppress,"{}")
OPT_SEP = pp.Suppress(pp.Opt(SEP))
CMD_KEYWORD = (pp.CaselessKeyword("start") | pp.CaselessKeyword("stop") | pp.CaselessKeyword("resume"))("cmd")
platform = pp.one_of("p1 p2 p3")("platform")
alias = ~(CMD_KEYWORD | platform) + pp.Word(pp.alphanums)
prop = pp.Word(pp.alphanums)
value = pp.Word(pp.alphanums)
prop_value = pp.Dict(pp.Group(prop + EQ + value))
task_config = LBRACE + pp.Group(pp.delimitedList(prop_value, delim = SEP)) + RBRACE
command = pp.Group(CMD_KEYWORD + pp.Group(pp.OneOrMore((task_config | alias.set_parse_action(set_alias)) + OPT_SEP))("tasks"))
expr = pp.Dict(pp.Group(platform + command[1, ...]("commands"))).setParseAction(set_expr)
grammar <<= pp.OneOrMore(expr + OPT_SEP)
print('\nDict = ', res.as_dict())
print('\n List of Dict = ', result)
print('\nList of Tokens =', res.as_list())
which restults in :
Dict = {'p1': {'platform': 'p1', 'commands': [{'cmd': 'start', 'tasks': [{'alias': 'a'}, {'alias': 'b', 'for': '30'}, {'alias': 'c'}, {'alias': 'd'}]}, {'cmd': 'stop', 'tasks': [{'alias': 'e'}]}]}, 'p2': {'platform': 'p2', 'commands': [{'cmd': 'resume', 'tasks': [{'alias': 'f'}]}]}, 'p3': {'platform': 'p3', 'commands': [{'cmd': 'start', 'tasks': [{'name': 'g', 'at': '5'}]}]}}
List of Dict = [{'platform': 'p1', 'commands': [{'cmd': 'start', 'tasks': [{'alias': 'a'}, {'alias': 'b', 'for': '30'}, {'alias': 'c'}, {'alias':
'd'}]}, {'cmd': 'stop', 'tasks': [{'alias': 'e'}]}]}, {'platform': 'p2', 'commands': [{'cmd': 'resume', 'tasks': [{'alias': 'f'}]}]}, {'platform': 'p3', 'commands': [{'cmd': 'start', 'tasks': [{'name': 'g', 'at': '5'}]}]}]
List of Tokens = [['p1', ['start', [{'alias': 'a'}, [['alias', 'b'], ['for', '30']], {'alias': 'c'}, {'alias': 'd'}]], ['stop', [{'alias': 'e'}]]], ['p2', ['resume', [{'alias': 'f'}]]], ['p3', ['start', [[['name', 'g'], ['at', '5']]]]]]