~kf5jwc/imp-parser

9a694fa0d8b73f01beb051b54cdab8a2b000be09 — Kyle Jones 1 year, 5 months ago 4a8b0d0
I've done more type annotating, which.... helped?

There's a confusing structure which this project has. For some reason the "generic" parser imports from the imp specific structures, which it should not. It also needs things imported in specific orders now, which super confuses me.
M ChangeLog => ChangeLog +3 -0
@@ 1,6 1,9 @@
CHANGES
=======

* I've done more type annotating, which.... helped?
* More typing!
* Package it!
* The imp parser works! Neato
* Define evaluators on our AST tree elements
* Splitting AST into logically named modules

D example_files/math.imp => example_files/math.imp +0 -1
@@ 1,1 0,0 @@
1 + 2 * 3

R example_files/hello.imp => examples/hello.imp +0 -0

A examples/if_then.imp => examples/if_then.imp +7 -0
@@ 0,0 1,7 @@
n := 5 + (2-5*3);
p := 1;
if n = 5 then
	x := 55
else
	m := 20
end

A examples/math.imp => examples/math.imp +1 -0
@@ 0,0 1,1 @@
n := 1 + 2 * 3

M imp_parser/ast/__init__.py => imp_parser/ast/__init__.py +1 -0
@@ 1,3 1,4 @@
from .types import Aexp, Bexp
from .boolean import BinopAexp, RelopBexp, AndBexp, OrBexp, NotBexp
from .integer import IntAexp, VarAexp
from .statement import AssignStatement, CompoundStatement, IfStatement, WhileStatement

M imp_parser/ast/boolean.py => imp_parser/ast/boolean.py +30 -29
@@ 1,16 1,17 @@
import attr
from dataclasses import dataclass
from .types import Aexp, Bexp
from ..parser import Exp
from typing import Any, Union


@attr.s
@dataclass
class BinopAexp(Aexp):
    op = attr.ib()
    left = attr.ib()
    right = attr.ib()
    op: str
    left: Aexp
    right: Aexp

    def eval(self, env):
        left_value = self.left.eval(env)
        right_value = self.right.eval(env)
    def eval(self, env: dict) -> int:
        left_value: int = self.left.eval(env)
        right_value: int = self.right.eval(env)

        if   self.op == '+':
            return left_value + right_value


@@ 19,22 20,22 @@ class BinopAexp(Aexp):
        elif self.op == '*':
            return left_value * right_value
        elif self.op == '/':
            return left_value / right_value
            return int(left_value / right_value)

        raise RuntimeError(f"Unknown operator: {self.op}")


@attr.s
@dataclass
class RelopBexp(Bexp):
    op = attr.ib()
    left = attr.ib()
    right = attr.ib()
    op: str
    left: Bexp
    right: Bexp

    def eval(self, env):
        left_value = self.left.eval(env)
        right_value = self.right.eval(env)
    def eval(self, env: dict) -> bool:
        left_value: int = self.left.eval(env)
        right_value: int = self.right.eval(env)

        if  self.op == '<':
        if   self.op == '<':
            return left_value <  right_value
        elif self.op == '<=':
            return left_value <= right_value


@@ 50,32 51,32 @@ class RelopBexp(Bexp):
        raise RuntimeError(f"Unknown operator: {self.op}")


@attr.s
@dataclass
class AndBexp(Bexp):
    left = attr.ib()
    right = attr.ib()
    left: Bexp
    right: Bexp

    def eval(self, env):
    def eval(self, env: dict) -> bool:
        left_value = self.left.eval(env)
        right_value = self.right.eval(env)
        return (left_value and right_value)


@attr.s
@dataclass
class OrBexp(Bexp):
    left = attr.ib()
    right = attr.ib()
    left: Bexp
    right: Bexp

    def eval(self, env):
    def eval(self, env: dict) -> bool:
        left_value = self.left.eval(env)
        right_value = self.right.eval(env)
        return (left_value or right_value)


@attr.s
@dataclass
class NotBexp(Bexp):
    exp = attr.ib()
    exp: Bexp

    def eval(self, env):
    def eval(self, env: dict) -> bool:
        value = self.exp.eval(env)
        return (not value)

M imp_parser/ast/integer.py => imp_parser/ast/integer.py +7 -7
@@ 1,20 1,20 @@
import attr
from dataclasses import dataclass
from .types import Aexp


@attr.s
@dataclass
class IntAexp(Aexp):
    i = attr.ib()
    i: int

    def eval(self, env) -> int:
    def eval(self, _: dict) -> int:
        return self.i


@attr.s
@dataclass
class VarAexp(Aexp):
    name = attr.ib()
    name: str

    def eval(self, env) -> int:
    def eval(self, env: dict) -> int:
        if self.name in env:
            return env[self.name]
        return 0

M imp_parser/ast/statement.py => imp_parser/ast/statement.py +22 -22
@@ 1,35 1,35 @@
import attr
from .types import Statement
from dataclasses import dataclass
from .types import Aexp, Statement
from .boolean import RelopBexp


@attr.s
@dataclass
class AssignStatement(Statement):
    name = attr.ib()
    aexp = attr.ib()
    name: str
    aexp: Aexp

    def eval(self, env):
    def eval(self, env: dict) -> None:
        value = self.aexp.eval(env)
        env[self.name] = value


@attr.s
@dataclass
class CompoundStatement(Statement):
    first = attr.ib()
    second = attr.ib()
    first: Statement
    second: Statement

    def eval(self, env):
    def eval(self, env: dict) -> None:
        self.first.eval(env)
        self.second.eval(env)


@attr.s
@dataclass
class IfStatement(Statement):
    condition = attr.ib()
    true_stmt = attr.ib()
    false_stmt = attr.ib()
    condition: RelopBexp
    true_stmt: Statement
    false_stmt: Statement

    def eval(self, env):
        condition_value = self.condition.eval(env)
    def eval(self, env: dict) -> None:
        condition_value: bool = self.condition.eval(env)
        if condition_value:
            self.true_stmt.eval(env)
        else:


@@ 37,13 37,13 @@ class IfStatement(Statement):
                self.false_stmt.eval(env)


@attr.s
@dataclass
class WhileStatement(Statement):
    condition = attr.ib()
    body = attr.ib()
    condition: RelopBexp
    body: CompoundStatement

    def eval(self, env):
    def eval(self, env: dict) -> None:
        condition_value = self.condition.eval(env)
        while condition_value:
            self.body.env(env)
            self.body.eval(env)
            condition_value = self.condition.eval(env)

M imp_parser/ast/types.py => imp_parser/ast/types.py +13 -3
@@ 1,3 1,7 @@
from typing import Union
from ..parser import Result


class Equality(object):
    def __eq__(self, other) -> bool:
        return isinstance(other, self.__class__) and \


@@ 9,13 13,19 @@ class Equality(object):

# "A"rithmetic "exp"ression
class Aexp(Equality):
    pass

    def eval(self, env: dict) -> int:
        raise NotImplementedError


# "B"oolean "exp"ression
class Bexp(Equality):
    pass

    def eval(self, env: dict) -> bool:
        raise NotImplementedError


class Statement(Equality):
    pass

    def eval(self, env: dict) -> None:
        raise NotImplementedError

M imp_parser/lexer/__init__.py => imp_parser/lexer/__init__.py +4 -4
@@ 1,13 1,13 @@
import sys
import re
from typing import List, Tuple, Type, Pattern
from typing import List, Tuple, Pattern
from enum import Enum
from . import exceptions
from .exceptions import NoMatchFound


# characters is an ~~iterable~~ indexable item which we can match against regex tokens, offered in token_exprs
def lex(characters, token_exprs) -> List[Tuple[str, Type[Enum]]]:
def lex(characters, token_exprs) -> List[Tuple[str, Enum]]:
    position = 0
    ret_tokens = []
    exprs = precompile_tokens(token_exprs)


@@ 22,8 22,8 @@ def lex(characters, token_exprs) -> List[Tuple[str, Type[Enum]]]:


# Compile all expressions so we avoid doing it for each token
def precompile_tokens(given_tokens) -> List[Tuple[Pattern[str], Type[Enum]]]:
    tokens = []
def precompile_tokens(given_tokens: List[Tuple[str, Enum]]) -> List[Tuple[Pattern[str], Enum]]:
    tokens: List[Tuple[Pattern[str], Enum]] = []
    for expr, tag in given_tokens:
        tokens.append((re.compile(expr), tag))
    return tokens

M imp_parser/parser/__init__.py => imp_parser/parser/__init__.py +2 -3
@@ 1,7 1,6 @@
from ..parser.combinators import Phrase
from .result import Result # It's weird, but this needs to be imported first-ish.
from .combinators import Phrase, Exp # This one needs to come second-ish?
from .statements import stmt_list
from . import combinators

from .arithmetic import aexp



M imp_parser/parser/arithmetic.py => imp_parser/parser/arithmetic.py +3 -3
@@ 1,6 1,6 @@
from functools import reduce
from typing import Callable, List, Union
from ..ast import IntAexp, BinopAexp, VarAexp
from ..ast import Aexp, IntAexp, BinopAexp, VarAexp
from ..imp_lexer import Tags
from .combinators import (
    Reserved,


@@ 23,7 23,7 @@ ID = Tag(Tags.ID)
AEXP_PRECEDENCE_LEVELS = [["*", "/"], ["+", "-"]]


def keyword(kw) -> Reserved:
def keyword(kw: str) -> Reserved:
    return Reserved(kw, Tags.Reserved)




@@ 47,7 47,7 @@ def aexp_term() -> Alternate:
    return aexp_value() | aexp_group()


def process_binop(op) -> Callable[[int, int], BinopAexp]:
def process_binop(op) -> Callable[[Aexp, Aexp], BinopAexp]:
    return lambda l, r: BinopAexp(op, l, r)



M imp_parser/parser/boolean.py => imp_parser/parser/boolean.py +4 -4
@@ 1,5 1,5 @@
from typing import Any, Callable, Union
from ..ast import AndBexp, NotBexp, OrBexp, RelopBexp
from typing import Any, Callable, Union, Tuple
from ..ast import Bexp, AndBexp, NotBexp, OrBexp, RelopBexp
from .arithmetic import aexp, any_operator_in_list, keyword, precedence, process_group
from .combinators import Alternate, Process, Lazy



@@ 8,7 8,7 @@ RELOPS = ["<", "<=", ">", ">=", "=", "!="]
BEXP_PRECEDENCE_LEVELS = [["and"], ["or"]]


def process_relop(parsed) -> RelopBexp:
def process_relop(parsed: Tuple[Tuple[Any, str], Any]) -> RelopBexp:
    ((left, op), right) = parsed
    return RelopBexp(op, left, right)



@@ 29,7 29,7 @@ def bexp_term() -> Alternate:
    return bexp_not() | bexp_relop() | bexp_group()


def process_logic(op) -> Callable[..., Union[AndBexp, OrBexp]]:
def process_logic(op: str) -> Callable[[Bexp, Bexp], Union[AndBexp, OrBexp]]:
    if op == "and":
        return lambda l, r: AndBexp(l, r)
    elif op == "or":

M imp_parser/parser/combinators.py => imp_parser/parser/combinators.py +46 -47
@@ 1,34 1,33 @@
import attr
from typing import Type, TypeVar, Union, Optional
from dataclasses import dataclass
from typing import Callable, Union, Optional, Any, Tuple
from .result import Result
from ..imp_lexer import Tags


class Parser(object):
    def __call__(self, tokens, pos) -> Optional[Union[Type["Parser"], Result]]:
        # subclasses should change this
        # I wonder if python has a way to warn about that
        pass
    def __call__(self: "Parser", tokens: BaseException, pos: int) -> Any:
        raise NotImplementedError

    # oooh, fun. Operator overloading can get confusing.
    def __add__(self, other) -> Concat:
    def __add__(self: "Parser", other: "Parser") -> "Concat":
        return Concat(self, other)

    def __mul__(self, other) -> Exp:
    def __mul__(self: "Parser", other: "Process") -> "Exp":
        return Exp(self, other)

    def __or__(self, other) -> Alternate:
    def __or__(self: "Parser", other: Union["Tag", "Parser"]) -> "Alternate":
        return Alternate(self, other)

    def __xor__(self, other) -> Process:
    def __xor__(self: "Parser", other: Callable) -> "Process":
        return Process(self, other)


@attr.s
@dataclass
class Reserved(Parser):
    value = attr.ib()
    tag = attr.ib()
    value: str
    tag: Tags

    def __call__(self, tokens, pos) -> Optional[Result]:
    def __call__(self, tokens, pos) -> Optional[Any]:
        if pos < len(tokens):
            (value, tag) = tokens[pos]
            if value == self.value and tag == self.tag:


@@ 37,11 36,11 @@ class Reserved(Parser):
        return None


@attr.s
@dataclass
class Tag(Parser):
    tag = attr.ib()
    tag: Tags

    def __call__(self, tokens, pos) -> Optional[Result]:
    def __call__(self, tokens, pos) -> Optional[Any]:
        if pos < len(tokens):
            (value, tag) = tokens[pos]
            if tag is self.tag:


@@ 50,12 49,12 @@ class Tag(Parser):
        return None


@attr.s
@dataclass
class Concat(Parser):
    left = attr.ib()
    right = attr.ib()
    left: Parser
    right: Parser

    def __call__(self, tokens, pos) -> Optional[Result]:
    def __call__(self, tokens, pos) -> Optional[Any]:
        left_result = self.left(tokens, pos)
        if left_result is None:
            return None


@@ 68,18 67,18 @@ class Concat(Parser):
        return Result(combined_result, right_result.pos)


@attr.s
@dataclass
class Exp(Parser):
    parser = attr.ib()
    separator = attr.ib()
    parser: Parser
    separator: "Process"

    def __call__(self, tokens, pos):
    def __call__(self, tokens, pos) -> Any:
        def process_next(parsed):
            (sepfunc, right) = parsed
            return sepfunc(result.value, right)

        result = self.parser(tokens, pos)
        next_parser = self.separator + self.parser ^ process_next
        next_parser: Process = self.separator + self.parser ^ process_next
        next_result = result

        while next_result:


@@ 90,32 89,32 @@ class Exp(Parser):
        return result


@attr.s
@dataclass
class Alternate(Parser):
    left = attr.ib()
    right = attr.ib()
    left: Parser
    right: Parser

    def __call__(self, tokens, pos) -> Optional[Type[Parser]]:
    def __call__(self, tokens, pos) -> Any:
        left_result = self.left(tokens, pos)
        if left_result is not None:
            return left_result
        return self.right(tokens, pos)


@attr.s
@dataclass
class Opt(Parser):
    parser = attr.ib()
    parser: Parser

    def __call__(self, tokens, pos) -> Union[Result, Type[Parser]]:
    def __call__(self, tokens, pos) -> Result:
        result = self.parser(tokens, pos)
        if result is not None:
            return result
        return Result(None, pos)


@attr.s
@dataclass
class Rep(Parser):
    parser = attr.ib()
    parser: Parser

    def __call__(self, tokens, pos) -> Result:
        results = []


@@ 129,34 128,34 @@ class Rep(Parser):
        return Result(results, pos)


@attr.s
@dataclass
class Process(Parser):
    parser = attr.ib()
    function = attr.ib()
    parser: Parser
    function: Callable

    def __call__(self, tokens, pos) -> Optional[Type[Parser]]:
        result = self.parser(tokens, pos)
    def __call__(self, tokens, pos) -> Optional[Result]:
        result: Optional[Result] = self.parser(tokens, pos)
        if result is not None:
            result.value = self.function(result.value)
        return result


@attr.s
@dataclass
class Lazy(Parser):
    parser_func = attr.ib()
    parser = None
    parser_func: Callable[..., Parser]
    parser: Optional[Parser] = None

    def __call__(self, tokens, pos) -> Type[Parser]:
    def __call__(self, tokens, pos) -> Any:
        if not self.parser:
            self.parser = self.parser_func()
        return self.parser(tokens, pos)


@attr.s
@dataclass
class Phrase(Parser):
    parser = attr.ib()
    parser: Parser

    def __call__(self, tokens, pos) -> Optional[Type[Parser]]:
    def __call__(self, tokens, pos) -> Optional[Any]:
        result = self.parser(tokens, pos)

        if result is not None:

M imp_parser/parser/result.py => imp_parser/parser/result.py +6 -7
@@ 1,9 1,8 @@
import attr
from dataclasses import dataclass
from typing import Sequence, Optional

@attr.s
class Result(object):
    value = attr.ib()
    pos = attr.ib()

    def __repr__(self) -> str:
        return f"Result({self.value}, {self.pos})"
@dataclass
class Result:
    value: Optional[Sequence[str]]
    pos: int

M imp_parser/parser/statements.py => imp_parser/parser/statements.py +6 -3
@@ 1,8 1,10 @@
from ..ast import AssignStatement, CompoundStatement, IfStatement, WhileStatement
from ..ast import AssignStatement, CompoundStatement, IfStatement, WhileStatement, RelopBexp
from .arithmetic import aexp, ID, keyword
from .boolean import bexp
from .combinators import Alternate, Exp, Lazy, Opt, Process

from typing import Tuple, Any


def assign_stmt() -> Process:
    def process(parsed):


@@ 18,9 20,10 @@ def stmt_list() -> Exp:


def if_stmt() -> Process:
    def process(parsed):
        # Okay, so at this point I *hate* how this combinator return structure is organized.
    # Okay, so at this point I *hate* how this combinator return structure is organized.
    def process(parsed: Tuple[Tuple[Tuple[Tuple[Tuple[str, RelopBexp], str], AssignStatement], Tuple[str, AssignStatement]], str]):
        (((((_, condition), _), true_stmt), false_parsed), _) = parsed

        if false_parsed:
            (_, false_stmt) = false_parsed
        else:

M requirements.txt => requirements.txt +0 -1
@@ 1,1 0,0 @@
attrs==18.*

M setup.cfg => setup.cfg +1 -1
@@ 3,4 3,4 @@ name = imp_parser
author = Kyle Jones
author-email = kyle@kf5jwc.us
summary = A simple IMP parser, following along with Jay Conrod's blogging
requires-python = >=3.5
requires-python = >=3.7

M tests/test_imp_interpreter.py => tests/test_imp_interpreter.py +4 -2
@@ 1,3 1,5 @@
#!/usr/bin/env python

import argparse
import sys



@@ 21,5 23,5 @@ if __name__ == '__main__':
    ast.eval(env)

    print("Final variable values:")
    for name in env:
        print(f" {name}: {env[name]}")
    for name, value in env.items():
        print(f" {name}: {value}")

M tests/test_imp_lex.py => tests/test_imp_lex.py +2 -0
@@ 1,3 1,5 @@
#!/usr/bin/env python

import sys
from imp_parser import imp_lex