python-3.xparsingiterations-expression

turn lisp command to nested lists in python


I am building a lisp parser in python 3.7.

Imagine I have this list program as a string

"(begin (define r 10) (* pi (* r r)))"

which I tokenize using:

def tokenize(string):
    return string.replace('(', ' ( ').replace(')', ' ) ').split()

returning

['(', 'begin', '(', 'define', 'r', '10', ')', '(', '*', 'pi', '(', '*', 'r', 'r', ')', ')', ')']

Now I am trying to build a function that reads from this list of tokens and returns this.

['begin', ['define', 'r', '10'], ['*', 'pi', ['*', 'r', 'r']]]

Any idea is welcome.


Solution

  • Here is (at last!) the programme i created using recursion.

    class LisParser:
        """class expecting a lisp program to recursively walk through."""
    
        def __init__(self):
            self.program = input(("Please provide lisp programme "
                                  "you want to pythonize"
                                  "(no need to pass the program as a string) : "))
            self.sub_program_sep = "("
            self.program_stack = self._tokenize_program()
            self.atom_is_int = self._is_int
            self.atom_is_flt = self._is_flt
            self.recursive_unpack = self.recursive_unpack
    
        def _tokenize_program(self):
            """func that splits a lisp program on white spaces ensuring parentheses tokenisation."""
    
            # if accidental multiple spaces, join method collapses them before padding parentheses before and after.
            tokens = " ".join(self.program.split()).replace('(', ' ( ').replace(')', ' ) ')
    
            # user might have inputted lisp program as python string i.e. "program". If so, get rid of double quotes.
            return (self.program.startswith('"')
                    and tokens.split()[1:-1]
                    or tokens.split())
    
        @staticmethod
        def _walk_stack(stack):
            """func returning the popped element at index 0 of the stack."""
            return stack.pop(0)
    
        @staticmethod
        def _is_flt(atom):
            """func trying to turn an atom to an float, else throws error."""
            try:
                float(atom)
                return True
            except ValueError:
                return False
    
        @staticmethod
        def _is_int(atom):
            """func trying to turn an atom to an int, else throws error."""
            try:
                int(atom)
                return True
            except ValueError:
                return False
    
        def _to_py_type(self, atom):
            """func that trying to an atom to an int, then a float and finally a string."""
            return ((self.atom_is_int(atom) and int(atom)) or
                    (self.atom_is_flt(atom) and float(atom)) or
                    str(atom))
    
        def recursive_unpack(self):
    
            # _walk_stack pops the first element off the stack.
            stack_head = self._walk_stack(stack=self.program_stack)
    
            # if token is an atom, convert to python type.
            if stack_head != self.sub_program_sep:
                return self._to_py_type(atom=stack_head)
    
            # "(" starts a sub_program, needs to be in its own unit (list).
            # The nested lists will represent the ast of the lisp program.
            elif stack_head == self.sub_program_sep:
                ast = list()
    
                # recursion base case is the end of the sub_program with ")".
                while self.program_stack[0] != ")":
                    ast.append(self.recursive_unpack())
    
                else:
                    # remove the closing parent, so that walk_atom() will return the atom, not the closing paren.
                    self.program_stack.remove(")")
    
                return ast
    
    
    if __name__ == '__main__':
    
        parser = LisParser()
    
        # when prompted, type "(first (list 1 (+ 2 3) 9))"
    
        var = parser.recursive_unpack()
    
        print(var)
    
        # ['first', ['list', 1, ['+', 2, 3], 9]]