~nch/python-compiler

dcc1d9a3df2aa60847586e118caa68d4a0c84bbb — nc 1 year, 7 months ago 7b5c5e3
fixed indent parsing a bit
2 files changed, 46 insertions(+), 16 deletions(-)

M compiler.py
M test_compiler.py
M compiler.py => compiler.py +18 -10
@@ 390,20 390,28 @@ def intersperse(p, delimp):
    """
    return convert(seq(p, many(seq(delimp, p))), lambda x: [x[0]] + sum(x[1], []))

def indentation():
def indentation(expect='same'):
    p = many(char(" "))
    def indentationf(stream):
        c, new_stream = p(stream)
        if type(c) == ParseError:
            return c, stream

        if len(c) == stream.indent:
            return EMPTY, new_stream
            actual = 'same'
        elif len(c) > stream.indent:
            stream.indent = len(c)
            return INDENT, new_stream
            actual = 'indent'
        elif len(c) < stream.indent:
            stream.indent = len(c)
            return DEDENT, new_stream
            actual = 'dedent'
        else:
            assert False

        if expect != actual:
            return ParseError(stream, f'indentation level: {expect}', f'indentation level: {actual}'), stream
        else:
            new_stream.indent = len(c)
            return EMPTY, new_stream

    return indentationf

# we don't care about space, so we discard it


@@ 433,10 441,10 @@ if_stmt = seq('if', space, expr, discard(char(':')),
        'else', discard(char(':')),
        lambda x: block(x))

# <stmt> := <indentation> (<return-stmt-body> | <assign-stmt-body | <if-stmt> | <expr>) <newline>
stmt = convert(seq(discard(indentation()), oneof(return_stmt_body, assign_stmt_body, expr), newline), lambda x: x[0])
# <block> := newline, BLOCK_BEGIN (<stmt>)+ BLOCK_END
block = convert(seq(discard(newline), one_or_more(stmt)), lambda x: x[0])
# <stmt> := (<return-stmt-body> | <assign-stmt-body | <if-stmt> | <expr>) <newline>
stmt = convert(seq(oneof(return_stmt_body, assign_stmt_body, expr), newline), lambda x: x[0])
# <block> := <newline> (<indentation> <stmt>)+
block = convert(seq(newline, one_or_more(convert(seq(indentation('indent'), stmt), lambda x: x[0]))), lambda x: x[0])
# <function> := 'def' <space> <identifier> '(' (<identifier> (',' <space> <identifier>)*) ')' ':' <newline> <block>
function = seq('def', space, identifier, char('('), intersperse(identifier, discard(seq(char(','), space))), char(')'), char(':'), block)


M test_compiler.py => test_compiler.py +28 -6
@@ 2,6 2,7 @@
import unittest
from compiler import *
from collections.abc import Iterable
import textwrap

class Var:
    def __init__(self, name):


@@ 123,13 124,34 @@ on some lines''')
        self.assertEqual(parse('l33t c0d3r', identifier), 'l33t')
        self.assertEqual(parse('val+34', identifier), 'val')

    def test_parse_block(self):
        self.assertEqual(parse(textwrap.dedent(
            '''\
            a
                b
            c'''), seq(identifier, block)), ['a', ['b']])

    def test_if(self):
        self.assertEqual(parse(
'''if true:
    print(2)
else:
    return 1
''', if_stmt), ['if', 'true', [['call', 'print', [2]]], 'else', [['return', 1]]])
        self.assertEqual(parse(textwrap.dedent(
                        '''\
                        if true:
                            print(2)
                            print(3)
                        else:
                            return 1
                            return 2
                        blah_blah()
                        '''), if_stmt),
                        ['if', 'true', [['call', 'print', [2]], ['call', 'print', [3]]], 'else', [['return', 1], ['return', 2]]])
        self.assertEqual(parse(textwrap.dedent(
                        '''\
                        if true:
                            print(1)
                            print(2)
                        else:
                            return 1
                        '''), if_stmt),
                        ['if', 'true', [['call', 'print', [1]], ['call', 'print', [2]]], 'else', [['return', 1]]])

    def test_block(self):
        b1 = Block('a', 'b', 'c')