~bl4ckb0ne/boson

5e950d498b48351c0d2ce07689e72aa9701dcdd3 — Simon Zeni 13 days ago de3cb87 master
rework lexer and parser
17 files changed, 784 insertions(+), 805 deletions(-)

M configure
M include/ast.h
M include/interpreter.h
M include/lexer.h
D include/parser.h
D include/token.h
A include/util.h
M meson.build
M src/ast.c
M src/builtin.c
M src/interpreter.c
M src/lexer.c
M src/ninja.c
M src/parser.c
M src/setup.c
D src/token.c
A src/util.c
M configure => configure +1 -1
@@ 7,6 7,7 @@ boson() {
	genrules boson \
		src/getopt_long.c \
		src/hash_table.c \
		src/util.c \
		src/log.c \
		src/ninja.c \
		src/options.c \


@@ 14,7 15,6 @@ boson() {
		src/builtin.c \
		src/interpreter.c \
		src/ast.c \
		src/token.c \
		src/lexer.c \
		src/parser.c \
		src/setup.c \

M include/ast.h => include/ast.h +26 -24
@@ 4,24 4,24 @@
#include <stddef.h>
#include <stdbool.h>

enum ast_expression_type {
	EXPRESSION_NONE,
	EXPRESSION_ASSIGNMENT,
	EXPRESSION_CONDITION,
	EXPRESSION_OR,
	EXPRESSION_AND,
	EXPRESSION_EQUALITY,
	EXPRESSION_RELATION,
	EXPRESSION_ADDITION,
	EXPRESSION_MULTIPLICATION,
	EXPRESSION_UNARY,
	EXPRESSION_SUBSCRIPT,
	EXPRESSION_FUNCTION,
	EXPRESSION_METHOD,
	EXPRESSION_IDENTIFIER,
	EXPRESSION_STRING,
	EXPRESSION_ARRAY,
	EXPRESSION_BOOL,
enum ast_type {
	AST_NONE,
	AST_ASSIGNMENT,
	AST_CONDITION,
	AST_OR,
	AST_AND,
	AST_EQUALITY,
	AST_RELATION,
	AST_ADDITION,
	AST_MULTIPLICATION,
	AST_UNARY,
	AST_SUBSCRIPT,
	AST_FUNCTION,
	AST_METHOD,
	AST_IDENTIFIER,
	AST_STRING,
	AST_ARRAY,
	AST_BOOL,
};

struct ast_expression;


@@ 46,7 46,7 @@ struct ast_identifier_list {
};

struct ast_expression_list {
	struct ast_expression **expressions;
	struct ast_expression **values;
	size_t n;
};



@@ 57,8 57,8 @@ struct ast_keyword_list {
};

struct ast_arguments {
	struct ast_expression_list *args;
	struct ast_keyword_list *kwargs;
	struct ast_expression_list *position;
	struct ast_keyword_list *keyword;
};

struct ast_subscript {


@@ 168,7 168,7 @@ struct ast_assignment {
};

struct ast_expression {
	enum ast_expression_type type;
	enum ast_type type;
	union {
		struct ast_assignment *assignment;
		struct ast_condition *condition;


@@ 186,7 186,7 @@ struct ast_expression {
		struct ast_string *string;
		struct ast_expression_list *array;
		struct ast_bool *boolean;
	} data;
	};
};

struct ast_selection {


@@ 209,7 209,9 @@ struct ast_statement {
		struct ast_expression *expression;
		struct ast_selection *selection;
		struct ast_iteration *iteration;
	} data;
	};
	struct ast_statement *next;
	struct ast_statement *prev;
};

const char *ast_expression_to_str(struct ast_expression *);

M include/interpreter.h => include/interpreter.h +2 -2
@@ 5,7 5,7 @@
#include <stddef.h>

struct options;
struct ast_root;
struct ast_statement;
struct ast_expression;
struct ast_function;
struct ast_string;


@@ 88,6 88,6 @@ struct object *eval_function(struct context *, struct ast_function *);

struct object *eval_expression(struct context *, struct ast_expression *);

struct context interpret_ast(struct ast_root *);
struct context interpret_ast(struct ast_statement *);

#endif // BOSON_INTERPRETER_H

M include/lexer.h => include/lexer.h +88 -4
@@ 3,17 3,101 @@

#include <stdio.h>

struct token;
struct ast_statement;

enum lex_token {
	// Keywords
	T_AND,
	T_BREAK,
	T_CONTINUE,
	T_ELIF,
	T_ELSE,
	T_ENDFOREACH,
	T_ENDIF,
	T_FALSE,
	T_FOREACH,
	T_IF,
	T_IN,
	T_NOT,
	T_OR,
	T_TRUE,
	T_LAST_KEYWORD = T_TRUE,

	// Operators
	T_ASSIGN,
	T_COLON,
	T_COMMA,
	T_DOT,
	T_EQ,
	T_GEQ,
	T_GT,
	T_LBRACK,
	T_LCURL,
	T_LEQ,
	T_LPAREN,
	T_LT,
	T_MINEQ,
	T_MINUS,
	T_MODEQ,
	T_MODULO,
	T_NEQ,
	T_PLUS,
	T_PLUSEQ,
	T_QUESTION,
	T_RBRACK,
	T_RCURL,
	T_RPAREN,
	T_SLASH,
	T_SLASHEQ,
	T_STAR,
	T_STAREQ,
	T_LAST_OP = T_STAREQ,

	// Data
	T_IDENTIFIER,
	T_NUMBER,
	T_STRING,

	// Magic
	T_EOF,
	T_EOL,
	T_ERROR,

};

struct location {
	const char *path;
	int lineno, colno;
};

struct token {
	struct location loc;
	enum lex_token type;
	union {
		struct {
			size_t n;
			char *data;
		} string;
		int number;
	};
};

struct lexer {
	FILE *file;
	const char *path;
	int cur, line, col;
	struct location loc;
	struct token prev;
	int cur;
};

struct token *lexer_tokenize(struct lexer *);
enum lex_token lex(struct lexer *, struct token *);
enum lex_token unlex(struct lexer *, struct token *);
struct ast_statement *parse(struct lexer *);

void lexer_init(struct lexer *, const char *);
void lexer_finish(struct lexer *);
void token_finish(struct token *);

const char *token_str(const struct token *);
const char *lex_token_str(enum lex_token);

#endif // BOSON_LEXER_H

D include/parser.h => include/parser.h +0 -15
@@ 1,15 0,0 @@
#ifndef BOSON_PARSER_H
#define BOSON_PARSER_H

#include <stddef.h>

struct ast_statement;

struct ast_root {
	struct ast_statement **statements;
	size_t n;
};

struct ast_root parse(const char *);

#endif // BOSON_PARSER_H

D include/token.h => include/token.h +0 -66
@@ 1,66 0,0 @@
#ifndef BOSON_TOKEN_H
#define BOSON_TOKEN_H

#include <stddef.h>

enum token_type {
	TOKEN_EOF,
	TOKEN_EOL,
	TOKEN_LPAREN,
	TOKEN_RPAREN,
	TOKEN_LBRACK,
	TOKEN_RBRACK,
	TOKEN_LCURL,
	TOKEN_RCURL,
	TOKEN_DOT,
	TOKEN_COMMA,
	TOKEN_COLON,
	TOKEN_ASSIGN,
	TOKEN_PLUS,
	TOKEN_MINUS,
	TOKEN_STAR,
	TOKEN_SLASH,
	TOKEN_MODULO,
	TOKEN_PLUSEQ,
	TOKEN_MINEQ,
	TOKEN_STAREQ,
	TOKEN_SLASHEQ,
	TOKEN_MODEQ,
	TOKEN_EQ,
	TOKEN_NEQ,
	TOKEN_GT,
	TOKEN_GEQ,
	TOKEN_LT,
	TOKEN_LEQ,
	TOKEN_TRUE,
	TOKEN_FALSE,
	TOKEN_IF,
	TOKEN_ELSE,
	TOKEN_ELIF,
	TOKEN_ENDIF,
	TOKEN_AND,
	TOKEN_OR,
	TOKEN_NOT,
	TOKEN_QM,
	TOKEN_FOREACH,
	TOKEN_ENDFOREACH,
	TOKEN_IN,
	TOKEN_CONTINUE,
	TOKEN_BREAK,
	TOKEN_IDENTIFIER,
	TOKEN_STRING,
	TOKEN_NUMBER,
};

struct token {
	enum token_type type;
	size_t n;
	char *data;
};

void token_destroy(struct token *);

const char *token_type_to_string(enum token_type);
const char *token_to_string(struct token *);

#endif // BOSON_TOKEN_H

A include/util.h => include/util.h +18 -0
@@ 0,0 1,18 @@
#ifndef BOSON_UTIL_H
#define BOSON_UTIL_H

#include <assert.h>
#include <stddef.h>

void *xcalloc(size_t, size_t);
void *xrealloc(void *, size_t);

#define malloc(a) (void *)sizeof(struct { static_assert(0, "Use xcalloc instead"); int _; });
#define calloc(a, b) (void *)sizeof(struct { static_assert(0, "Use xcalloc instead"); int _; });
#define realloc(a, b) (void *)sizeof(struct { static_assert(0, "Use xrealloc instead"); int _; });

#ifndef PATH_MAX
#define PATH_MAX 4096
#endif

#endif // BOSON_UTIL_H

M meson.build => meson.build +1 -1
@@ 22,6 22,7 @@ add_project_arguments(
src = files(
	'src/getopt_long.c',
	'src/hash_table.c',
	'src/util.c',
	'src/log.c',
	'src/ninja.c',
	'src/options.c',


@@ 29,7 30,6 @@ src = files(
	'src/builtin.c',
	'src/interpreter.c',
	'src/ast.c',
	'src/token.c',
	'src/lexer.c',
	'src/parser.c',
	'src/setup.c',

M src/ast.c => src/ast.c +17 -17
@@ 6,23 6,23 @@ ast_expression_to_str(struct ast_expression *expression)
{
#define TRANSLATE(e) case e: return #e;
	switch (expression->type) {
	TRANSLATE(EXPRESSION_NONE);
	TRANSLATE(EXPRESSION_ASSIGNMENT);
	TRANSLATE(EXPRESSION_CONDITION);
	TRANSLATE(EXPRESSION_OR);
	TRANSLATE(EXPRESSION_AND);
	TRANSLATE(EXPRESSION_EQUALITY);
	TRANSLATE(EXPRESSION_RELATION);
	TRANSLATE(EXPRESSION_ADDITION);
	TRANSLATE(EXPRESSION_MULTIPLICATION);
	TRANSLATE(EXPRESSION_UNARY);
	TRANSLATE(EXPRESSION_SUBSCRIPT);
	TRANSLATE(EXPRESSION_FUNCTION);
	TRANSLATE(EXPRESSION_METHOD);
	TRANSLATE(EXPRESSION_IDENTIFIER);
	TRANSLATE(EXPRESSION_STRING);
	TRANSLATE(EXPRESSION_ARRAY);
	TRANSLATE(EXPRESSION_BOOL);
	TRANSLATE(AST_NONE);
	TRANSLATE(AST_ASSIGNMENT);
	TRANSLATE(AST_CONDITION);
	TRANSLATE(AST_OR);
	TRANSLATE(AST_AND);
	TRANSLATE(AST_EQUALITY);
	TRANSLATE(AST_RELATION);
	TRANSLATE(AST_ADDITION);
	TRANSLATE(AST_MULTIPLICATION);
	TRANSLATE(AST_UNARY);
	TRANSLATE(AST_SUBSCRIPT);
	TRANSLATE(AST_FUNCTION);
	TRANSLATE(AST_METHOD);
	TRANSLATE(AST_IDENTIFIER);
	TRANSLATE(AST_STRING);
	TRANSLATE(AST_ARRAY);
	TRANSLATE(AST_BOOL);
	default:
		report("unknown token");
		break;

M src/builtin.c => src/builtin.c +45 -45
@@ 1,8 1,11 @@
#define _XOPEN_SOURCE 700

#include "ast.h"
#include "interpreter.h"
#include "options.h"
#include "log.h"


#include <assert.h>
#include <stdlib.h>
#include <stddef.h>


@@ 18,17 21,17 @@ static struct object *
add_project_arguments(struct context *ctx, struct ast_arguments *args)
{
	const char *language = NULL;
	for (size_t i = 0; i < args->kwargs->n; ++i) {
		const char *key = args->kwargs->keys[i]->data;
		struct ast_expression *value = args->kwargs->values[i];
	for (size_t i = 0; i < args->keyword->n; ++i) {
		const char *key = args->keyword->keys[i]->data;
		struct ast_expression *value = args->keyword->values[i];
		if (strcmp(key, "language") == 0) {
			if (value->type != EXPRESSION_STRING) {
			if (value->type != AST_STRING) {
				fatal("language must be a string");
			}
			if (language) {
				fatal("language has already been specified");
			}
			language = value->data.string->data;
			language = value->string->data;
		} else {
			fatal("invalid keyword argument '%s'", key);
		}


@@ 40,8 43,8 @@ add_project_arguments(struct context *ctx, struct ast_arguments *args)
		fatal("language '%s' is not supported", language);
	}

	for (size_t i = 0; i < args->args->n; ++i) {
		struct ast_expression *expr = args->args->expressions[i];
	for (size_t i = 0; i < args->position->n; ++i) {
		struct ast_expression *expr = args->position->values[i];
		struct object *obj = eval_expression(ctx, expr);

		if (obj->type != OBJECT_TYPE_STRING) {


@@ 73,7 76,7 @@ add_project_arguments(struct context *ctx, struct ast_arguments *args)
static struct object *
executable(struct context *ctx, struct ast_arguments *args)
{
	if (args->args->n != 2) {
	if (args->position->n != 2) {
		fatal("function 'executable' requires at least 2 arguments");
	}



@@ 83,16 86,15 @@ executable(struct context *ctx, struct ast_arguments *args)
	}
	target->type = BUILD_TARGET_EXECUTABLE;

	struct object *name = eval_expression(ctx, args->args->expressions[0]);
	struct object *name = eval_expression(ctx, args->position->values[0]);
	if (name->type != OBJECT_TYPE_STRING) {
		fatal("executable name must be a string");
	}

	target->name.n = name->string.n;
	target->name.data = calloc(target->name.n, sizeof(char));
	strncpy(target->name.data, name->string.data, target->name.n);
	target->name.data = strdup(name->string.data);

	struct object *sources = eval_expression(ctx, args->args->expressions[1]);
	struct object *sources = eval_expression(ctx, args->position->values[1]);
	if (sources->type == OBJECT_TYPE_STRING) {
		fatal("todo handle single source file");
	} else if (sources->type == OBJECT_TYPE_ARRAY) {


@@ 111,10 113,10 @@ executable(struct context *ctx, struct ast_arguments *args)
		fatal("sources must be either a string or a list of string");
	}

	for (size_t i = 0; i < args->kwargs->n; ++i) {
		const char *key = args->kwargs->keys[i]->data;
	for (size_t i = 0; i < args->keyword->n; ++i) {
		const char *key = args->keyword->keys[i]->data;
		struct object *value = eval_expression(ctx,
				args->kwargs->values[i]);
				args->keyword->values[i]);
		if (strcmp(key, "include_directories") == 0) {
			assert(value->type == OBJECT_TYPE_ARRAY);
			target->include.n = value->array.n;


@@ 147,7 149,7 @@ executable(struct context *ctx, struct ast_arguments *args)
static struct object *
files(struct context *ctx, struct ast_arguments *args)
{
	if (args->kwargs->n != 0) {
	if (args->keyword->n != 0) {
		fatal("function 'files' takes no keyword arguments");
	}



@@ 161,12 163,12 @@ files(struct context *ctx, struct ast_arguments *args)

	files->type = OBJECT_TYPE_ARRAY;

	for (size_t i = 0; i < args->args->n; ++i) {
		struct ast_expression *expr = args->args->expressions[i];
		if (expr->type != EXPRESSION_STRING) {
	for (size_t i = 0; i < args->position->n; ++i) {
		struct ast_expression *expr = args->position->values[i];
		if (expr->type != AST_STRING) {
			fatal("function 'files' takes only string arguments");
		}
		struct object *file = eval_string(expr->data.string);
		struct object *file = eval_string(expr->string);

		const size_t files_size = files->array.n + 1;
		files->array.objects = realloc(files->array.objects,


@@ 182,7 184,7 @@ files(struct context *ctx, struct ast_arguments *args)
static struct object *
include_directories(struct context *ctx, struct ast_arguments *args)
{
	if (args->kwargs->n != 0) {
	if (args->keyword->n != 0) {
		fatal("function 'include_directories' takes no keyword arguments");
	}



@@ 196,12 198,13 @@ include_directories(struct context *ctx, struct ast_arguments *args)

	includes->type = OBJECT_TYPE_ARRAY;

	for (size_t i = 0; i < args->args->n; ++i) {
		struct ast_expression *expr = args->args->expressions[i];
		if (expr->type != EXPRESSION_STRING) {
	for (size_t i = 0; i < args->position->n; ++i) {
		struct ast_expression *expr = args->position->values[i];
		if (expr->type != AST_STRING) {
			fatal("function 'files' takes only string arguments");
		}
		struct object *path = eval_string(expr->data.string);
		struct object *path = eval_string(expr->string);
		info("include path is %s", path->string.data);

		char abs_path[PATH_MAX] = {0};
		snprintf(abs_path, PATH_MAX, "%s/%s", cwd, path->string.data);


@@ 227,56 230,53 @@ include_directories(struct context *ctx, struct ast_arguments *args)
static struct object *
project(struct context *ctx, struct ast_arguments *args)
{
	if (args->args->expressions[0]->type != EXPRESSION_STRING) {
	if (args->position->values[0]->type != AST_STRING) {
		fatal("project: first argument must be a string literal");
	}

	if (args->args->expressions[1]->type != EXPRESSION_STRING) {
	if (args->position->values[1]->type != AST_STRING) {
		fatal("project: second argument must be a string literal");
	}

	const char *language = args->args->expressions[1]->data.string->data;
	const char *language = args->position->values[1]->string->data;
	if (strcmp(language, "c") != 0) {
		fatal("project: %s language not supported", language);
	}

	for (size_t i = 0; i < args->kwargs->n; ++i) {
		const char *key = args->kwargs->keys[i]->data;
		struct ast_expression *value = args->kwargs->values[i];
	for (size_t i = 0; i < args->keyword->n; ++i) {
		const char *key = args->keyword->keys[i]->data;
		struct ast_expression *value = args->keyword->values[i];
		if (strcmp(key, "version") == 0) {
			if (value->type != EXPRESSION_STRING) {
			if (value->type != AST_STRING) {
				fatal("version must be a string");
			}
			if (ctx->version.data) {
				fatal("version has already been specified");
			}
			ctx->version.data = calloc(value->data.string->n,
					sizeof(char));
			strncpy(ctx->version.data, value->data.string->data,
					value->data.string->n);
			ctx->version.n = value->data.string->n;
			ctx->version.data = strdup(value->string->data);
			ctx->version.n = value->string->n;
		} else if (strcmp(key, "license") == 0) {
			if (value->type == EXPRESSION_ARRAY) {
			if (value->type == AST_ARRAY) {
				fatal("multiple licenses not supported");
			} else if (value->type != EXPRESSION_STRING) {
			} else if (value->type != AST_STRING) {
				fatal("license must be a string");
			}
		} else if (strcmp(key, "default_options") == 0) {
			if (value->type != EXPRESSION_ARRAY) {
			if (value->type != AST_ARRAY) {
				fatal("default_options must be an array");
			}

			for(size_t j = 0; j < value->data.array->n; ++j) {
			for(size_t j = 0; j < value->array->n; ++j) {
				struct ast_expression *option =
					value->data.array->expressions[j];
					value->array->values[j];

				if (option->type != EXPRESSION_STRING) {
				if (option->type != AST_STRING) {
					fatal("option must be a string");
				}

				char k[32] = {0}, v[32] = {0};
				sscanf(option->data.string->data, "%32[^=]=%s",
						k, v);
				sscanf(option->string->data, "%32[^=]=%s", k,
						v);
				if (!options_parse(ctx->options, k, v)) {
					fatal("failed to parse option '%s=%s'",
							k, v);

M src/interpreter.c => src/interpreter.c +29 -30
@@ 1,5 1,4 @@
#include "interpreter.h"
#include "parser.h"
#include "options.h"
#include "ast.h"
#include "hash_table.h"


@@ 49,14 48,14 @@ static void
string_format(struct context *ctx, struct object *object,
		struct ast_arguments *arguments)
{
	if (arguments->kwargs->n != 0) {
	if (arguments->keyword->n != 0) {
		fatal("string format doesn't support kwargs");
	}

	char *fmt = object->string.data;
	for(size_t i = 0; i < arguments->args->n; ++i) {
	for(size_t i = 0; i < arguments->position->n; ++i) {
		struct object *arg = eval_expression(ctx,
				arguments->args->expressions[i]);
				arguments->position->values[i]);
		const char *str_arg = object_to_str(arg);

		char needle[8] = {0};


@@ 106,7 105,7 @@ eval_string_method(struct context *ctx, struct ast_method *method)
static struct object *
eval_identifier_method(struct context *ctx, struct ast_method *method)
{
	struct ast_identifier *id = method->left->data.identifier;
	struct ast_identifier *id = method->left->identifier;
	struct object *obj = NULL;
	if (strcmp(id->data, "meson") == 0) {
		obj = eval_meson_object(ctx, method->right);


@@ 122,10 121,10 @@ eval_method(struct context *ctx, struct ast_method *method)
	struct object *obj = NULL;
	struct ast_expression *expr = method->left;
	switch (expr->type) {
	case EXPRESSION_STRING:
	case AST_STRING:
		obj = eval_string_method(ctx, method);
		break;
	case EXPRESSION_IDENTIFIER:
	case AST_IDENTIFIER:
		obj = eval_identifier_method(ctx, method);
		break;
	default:


@@ 174,7 173,7 @@ eval_array(struct context *ctx, struct ast_expression_list *array)
	obj->array.objects = calloc(array->n, sizeof(struct object*));

	for (size_t i = 0; i < array->n; ++i) {
		struct object *item = eval_expression(ctx, array->expressions[i]);
		struct object *item = eval_expression(ctx, array->values[i]);
		if (!item) {
			fatal("array item at %zu is empty", i);
		}


@@ 189,23 188,23 @@ eval_expression(struct context *ctx, struct ast_expression *expression)
{
	struct object *obj = NULL;
	switch (expression->type) {
	case EXPRESSION_FUNCTION:
		obj = eval_function(ctx, expression->data.function);
	case AST_FUNCTION:
		obj = eval_function(ctx, expression->function);
		break;
	case EXPRESSION_METHOD:
		obj = eval_method(ctx, expression->data.method);
	case AST_METHOD:
		obj = eval_method(ctx, expression->method);
		break;
	case EXPRESSION_STRING:
		obj = eval_string(expression->data.string);
	case AST_STRING:
		obj = eval_string(expression->string);
		break;
	case EXPRESSION_ASSIGNMENT:
		obj = eval_assignment(ctx, expression->data.assignment);
	case AST_ASSIGNMENT:
		obj = eval_assignment(ctx, expression->assignment);
		break;
	case EXPRESSION_IDENTIFIER:
		obj = eval_identifier(ctx, expression->data.identifier);
	case AST_IDENTIFIER:
		obj = eval_identifier(ctx, expression->identifier);
		break;
	case EXPRESSION_ARRAY:
		obj = eval_array(ctx, expression->data.array);
	case AST_ARRAY:
		obj = eval_array(ctx, expression->array);
		break;
	default:
		fatal("todo handle expression %s",


@@ 223,12 222,12 @@ check_first(struct ast_statement *statement)
		goto check_first_err;
	}

	struct ast_expression *expr = statement->data.expression;
	if (expr->type != EXPRESSION_FUNCTION) {
	struct ast_expression *expr = statement->expression;
	if (expr->type != AST_FUNCTION) {
		goto check_first_err;
	}

	if (strcmp(expr->data.function->left->data, "project") != 0) {
	if (strcmp(expr->function->left->data, "project") != 0) {
		goto check_first_err;
	}



@@ 239,27 238,27 @@ check_first_err:
}

struct context
interpret_ast(struct ast_root *root)
interpret_ast(struct ast_statement *root)
{
	struct context ctx = {0};

	ctx.options = options_create();
	ctx.env = hash_table_create(8u);

	check_first(root->statements[0]);
	check_first(root);

	for (size_t i = 0; i < root->n - 1; ++i) {
		struct ast_statement *statement = root->statements[i];

		switch(statement->type) {
	struct ast_statement *curr = root;
	while (curr) {
		switch(curr->type) {
		case STATEMENT_EXPRESSION:
			eval_expression(&ctx, statement->data.expression);
			eval_expression(&ctx, curr->expression);
			break;
		case STATEMENT_SELECTION:
		case STATEMENT_ITERATION:
		default:
			fatal("unknown statement");
		}
		curr = curr->next;
	}

	return ctx;

M src/lexer.c => src/lexer.c +234 -203
@@ 1,273 1,304 @@
#include "lexer.h"
#include "token.h"
#include "log.h"
#define _XOPEN_SOURCE 700

#include <stdbool.h>
#include <ctype.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>

#include "lexer.h"
#include "log.h"
#include "util.h"

// Order must match lexer.h
static const char *tokens[] = {
	// Keywords
	[T_AND] = "and",
	[T_BREAK] = "break",
	[T_CONTINUE] = "continue",
	[T_ELIF] = "elif",
	[T_ELSE] = "else",
	[T_ENDFOREACH] = "endforeach",
	[T_ENDIF] = "endif",
	[T_FALSE] = "false",
	[T_FOREACH] = "foreach",
	[T_IF] = "if",
	[T_IN] = "in",
	[T_NOT] = "not",
	[T_OR] = "or",
	[T_TRUE] = "true",

	// Operators
	[T_ASSIGN] = "=",
	[T_COLON] = ":",
	[T_COMMA] = ",",
	[T_DOT] = ".",
	[T_EQ] = "==",
	[T_GEQ] = ">=",
	[T_GT] = ">",
	[T_LBRACK] = "[",
	[T_LCURL] = "{",
	[T_LEQ] = "<=",
	[T_LPAREN] = "(",
	[T_LT] = "<",
	[T_MINEQ] = "-=",
	[T_MINUS] = "-",
	[T_MODEQ] = "%=",
	[T_MODULO] = "%",
	[T_NEQ] = "!=",
	[T_PLUS] = "+",
	[T_PLUSEQ] = "+=",
	[T_QUESTION] = "?",
	[T_RBRACK] = "]",
	[T_RCURL] = "}",
	[T_RPAREN] = ")",
	[T_SLASH] = "/",
	[T_SLASHEQ] = "/=",
	[T_STAR] = "*",
	[T_STAREQ] = "*=",
};

static int
next(struct lexer *lexer)
next(struct lexer *lexer, struct location *loc)
{
	if (lexer->cur == '\n') {
		lexer->line++;
		lexer->col = 1;
	int c = getc(lexer->file);
	if (c == '\n') {
		lexer->loc.lineno++;
		lexer->loc.colno = 0;
	} else if (c == '\t') {
		lexer->loc.colno += 2;
	} else {
		lexer->col++;
		lexer->loc.colno++;
	}

	lexer->cur = getc(lexer->file);
	return lexer->cur;
	if (loc) {
		loc->path = lexer->loc.path;
		loc->lineno = lexer->loc.lineno;
		loc->colno = lexer->loc.colno;
	}

	lexer->cur = c;
	return c;
}

static int
peek(struct lexer *lexer)
push_back(struct lexer *lexer)
{
	int c = getc(lexer->file);
	ungetc(c, lexer->file);
	int c = ungetc(lexer->cur, lexer->file);
	lexer->cur = 0;
	return c;
}

static void
comment(struct lexer *lexer)
static int
cmp_keyword(const void *lhs, const void *rhs)
{
	if (lexer->cur != '#') {
		return;
	}

	do {
		next(lexer);
	} while (lexer->cur != '\n');
	next(lexer);
	return strcmp((char *)lhs, *(const char **)rhs);
}

static bool
keyword(struct lexer *lexer, struct token *token)
static enum lex_token
identifier(struct lexer *lexer, struct token *out)
{
	/* must stay in sorted order */
	static const struct {
		const char *name;
		enum token_type type;
	} keywords[] = {
		{"and", TOKEN_AND},
		{"break", TOKEN_BREAK},
		{"continue", TOKEN_CONTINUE},
		{"elif", TOKEN_ELIF},
		{"else", TOKEN_ELSE},
		{"endforeach", TOKEN_ENDFOREACH},
		{"endif", TOKEN_ENDIF},
		{"false", TOKEN_FALSE},
		{"foreach", TOKEN_FOREACH},
		{"if", TOKEN_IF},
		{"in", TOKEN_IN},
		{"not", TOKEN_NOT},
		{"or", TOKEN_OR},
		{"true", TOKEN_TRUE},
	};
	size_t n = 1;
	char buf[256] = {0};
	buf[n - 1] = lexer->cur;

	int low = 0, high = (sizeof(keywords) / sizeof(keywords[0])) - 1, mid, cmp;

	while (low <= high) {
		mid = (low + high) / 2;
		cmp = strcmp(token->data, keywords[mid].name);
		if (cmp == 0) {
			token->type = keywords[mid].type;
			return true;
		}

		if (cmp < 0) {
			high = mid - 1;
		} else {
			low = mid + 1;
		}
	int c;
	while ((c = next(lexer, NULL)) && (isalnum(c) || c == '_')) {
		buf[n++] = c;
	}

	return false;
}

static struct token *
identifier(struct lexer *lexer)
{
	struct token *token = calloc(1, sizeof(struct token));
	push_back(lexer);

	size_t n = 1;
	char *id = calloc(n, sizeof(char));
	if (id == NULL) {
		fatal("failed to allocate buffer for identifier");
	}
	void *token = bsearch(buf, tokens, T_LAST_KEYWORD + 1,
			sizeof(tokens[0]), cmp_keyword);

	id[n - 1] = lexer->cur;
	while (isalnum(next(lexer)) || lexer->cur == '_') {
		id = realloc(id, ++n * sizeof(char));
		id[n - 1] = (char)lexer->cur;
	}
	id = realloc(id, ++n * sizeof(char));
	id[n - 1] = '\0';
	out->string.data = strdup(buf);
	out->string.n = n;

	token->data = id;
	token->n = n;

	if (!keyword(lexer, token)) {
		token->type = TOKEN_IDENTIFIER;
	if (token) {
		out->type = (const char **)token - tokens;
	} else {
		out->type = T_IDENTIFIER;
	}

	return token;
	return out->type;
}

static struct token *
number(struct lexer *lexer)
static enum lex_token
number(struct lexer *lexer, struct token *out)
{
	fatal("todo number");

	struct token *token = calloc(1, sizeof(struct token));
	token->type = TOKEN_NUMBER;

	/* FIXME handle octal */
	/* FIXME handle hexadecimal */
	/*
	if (lexer->cur == '0') {
		if (peek(lexer) == 'o')
		else if (peek(lexer) == 'x')
	}
	*/

	return token;
	fatal("number");
	return 0;
}

static struct token *
string(struct lexer *lexer)
static enum lex_token
string(struct lexer *lexer, struct token *out)
{
	struct token *token = calloc(1, sizeof(struct token));
	token->type = TOKEN_STRING;

	while(lexer->cur == '\'') {
		next(lexer);
		next(lexer, NULL);
	}

	size_t n = 1;
	char *id = calloc(n, sizeof(char));
	if (id == NULL) {
		fatal("failed to allocate buffer for identifier");
	}
	char buf[PATH_MAX] = {0};
	buf[n - 1] = lexer->cur;

	id[n - 1] = lexer->cur;
	while (next(lexer) != '\'') {
		id = realloc(id, ++n * sizeof(char));
		id[n - 1] = (char)lexer->cur;
	int c;
	while ((c = next(lexer, NULL)) != '\'') {
		buf[n++] = c;
	}
	id = realloc(id, ++n * sizeof(char));
	id[n - 1] = '\0';

	token->data = id;
	token->n = n;
	out->type = T_STRING;
	out->string.data = strdup(buf);
	out->string.n = n;

	while(lexer->cur == '\'') {
		next(lexer);
	}
	while((c = next(lexer, NULL)) == '\'');

	return token;
	push_back(lexer);

	return out->type;
}

struct token *
lexer_tokenize(struct lexer *lexer)
enum lex_token
lex(struct lexer *lexer, struct token *out)
{
	while (isspace(lexer->cur)) {
		if (lexer->cur == '\n') {
			struct token *token = calloc(1, sizeof(struct token));
			if (!token) {
				fatal("failed to allocate token");
			}
			token->type = TOKEN_EOL;
			next(lexer);
			return token;
		}
		next(lexer);
	if (lexer->prev.type != T_ERROR) {
		*out = lexer->prev;
		lexer->prev.type = T_ERROR;
		return out->type;
	}

	if (lexer->cur == '#') {
		comment(lexer);
	}
	int c;
	while ((c = next(lexer, &out->loc)) && isspace(c));

	if (isalnum(lexer->cur) || lexer->cur == '_') {
		return identifier(lexer);
	} else if (isdigit(lexer->cur)) {
		return number(lexer);
	} else if (lexer->cur == '\'') {
		return string(lexer);
	if (c == '#') {
		while ((c = next(lexer, NULL)) != '\n');
		c = next(lexer, &out->loc);
	}

	struct token *token = calloc(1, sizeof(struct token));
	switch(lexer->cur) {
		case '(':
			token->type = TOKEN_LPAREN;
			break;
		case ')':
			token->type = TOKEN_RPAREN;
			break;
		case '[':
			token->type = TOKEN_LBRACK;
			break;
		case ']':
			token->type = TOKEN_RBRACK;
			break;
		case '{':
			token->type = TOKEN_LCURL;
			break;
		case '}':
			token->type = TOKEN_RCURL;
			break;
		case '.':
			token->type = TOKEN_DOT;
			break;
		case ',':
			token->type = TOKEN_COMMA;
			break;
		case ':':
			token->type = TOKEN_COLON;
			break;
		// arithmetic
		case '+':
			if (peek(lexer) == '=') {
				next(lexer);
				token->type = TOKEN_PLUS;
			} else {
				token->type = TOKEN_PLUS;
			}
			break;
		case '-':
			token->type = TOKEN_MINUS;
			break;
		case '=':
			token->type = TOKEN_ASSIGN;
			break;
		case '\0':
		default:
			token->type = TOKEN_EOF;
			break;
	if (isalpha(c) || c == '_'){
		return identifier(lexer, out);
	} else if (isdigit(c)) {
		return number(lexer, out);
	}
	next(lexer);
	return token;

	switch (c) {
	case '=':
		out->type = T_ASSIGN;
		break;
	case ',':
		out->type = T_COMMA;
		break;
	case ':':
		out->type = T_COLON;
		break;
	case '.':
		out->type = T_DOT;
		break;
	case '[':
		out->type = T_LBRACK;
		break;
	case '(':
		out->type = T_LPAREN;
		break;
	case ']':
		out->type = T_RBRACK;
		break;
	case ')':
		out->type = T_RPAREN;
		break;
	case '\'':
		out->type = string(lexer, out);
		break;
	case -1:
	case '\0':
		out->type = T_EOF;
		break;
	default:
		fatal("TODO lex '%c' at %s:%d:%d", c, out->loc.path,
				out->loc.lineno, out->loc.colno);
		break;
	};

	return out->type;
}

enum lex_token
unlex(struct lexer *lexer, struct token *in)
{
	assert(lexer->prev.type == T_ERROR);
	lexer->prev = *in;

	return lexer->prev.type;
}

void
lexer_init(struct lexer *lexer, const char *path)
{
	memset(lexer, 0, sizeof(struct lexer));

	lexer->file = fopen(path, "r");
	if (lexer->file == NULL) {
		fatal("Failed to open %s", path);
		fatal("Failed to open file '%s'", path);
	}

	lexer->path = path;
	lexer->line = 1;
	lexer->col = 1;
	lexer->prev.type = T_ERROR;

	next(lexer);
	lexer->loc.path = path;
	lexer->loc.lineno = 1;
	lexer->loc.colno = 0;
}

void
lexer_finish(struct lexer *lexer)
{
	fclose(lexer->file);
	memset(lexer, 0, sizeof(struct lexer));
}

void
token_finish(struct token *token)
{
	if (token->type == T_IDENTIFIER || token->type == T_STRING) {
		free(token->string.data);
	}

	token->type = 0;
	token->loc.path = NULL;
	token->loc.lineno = 0;
	token->loc.colno = 0;
}

const char *
token_str(const struct token *token)
{
	static char buf[256] = {0};
	switch(token->type) {
	case T_IDENTIFIER:
		snprintf(buf, sizeof(buf), "identifier '%s'", token->string.data);
		break;
	case T_NUMBER:
		snprintf(buf, sizeof(buf), "number '%d'", token->number);
		break;
	case T_STRING:
		snprintf(buf, sizeof(buf), "string '%s'", token->string.data);
		break;
	default:
		return lex_token_str(token->type);
	}

	return buf;
}

const char *
lex_token_str(enum lex_token tok)
{
	switch (tok) {
	case T_EOF:
		return "EOF";
	case T_ERROR:
		return "ERROR";
	default:
		assert(tok < sizeof(tokens) / sizeof(tokens[0]));
		return tokens[tok];
	}
}

M src/ninja.c => src/ninja.c +0 -1
@@ 1,5 1,4 @@
#include "ninja.h"
#include "parser.h"
#include "log.h"
#include "interpreter.h"
#include "options.h"

M src/parser.c => src/parser.c +232 -298
@@ 1,8 1,4 @@
#include "parser.h"
#include "ast.h"
#include "lexer.h"
#include "token.h"
#include "log.h"
#define _XOPEN_SOURCE 700

#include <assert.h>
#include <stdio.h>


@@ 10,40 6,25 @@
#include <stdbool.h>
#include <string.h>

#include "ast.h"
#include "lexer.h"
#include "log.h"
#include "util.h"

#define PATH_MAX 4096

struct parser
{
	struct lexer lexer;
	struct token *cur;
	struct token *last;
};

static bool
accept(struct parser *parser, enum token_type type) {
	if (parser->cur->type == type) {
		free(parser->last);
		parser->last = parser->cur;
		parser->cur = lexer_tokenize(&parser->lexer);
		return true;
	}
	return false;
}
static struct ast_expression *parse_expression(struct lexer *);

static void
expect(struct parser *parser, enum token_type type)
unexpected(struct token *token)
{
	if (!accept(parser, type)) {
		fatal("expected %s, got %s", token_type_to_string(type),
				token_to_string(parser->cur));
	}
	fatal("Unexpected token %s at %s:%d:%d", token_str(token),
			token->loc.path, token->loc.lineno, token->loc.colno);
}

struct ast_expression *parse_expression(struct parser *);

void
static void
expression_list_appened(struct ast_expression_list *list,
		struct ast_expression *expression)
		struct ast_expression *expr)
{
	if (list == NULL) {
		fatal("cannot appened expression to empty list");


@@ 51,9 32,9 @@ expression_list_appened(struct ast_expression_list *list,

	const size_t new_size = list->n + 1;

	list->expressions = realloc(list->expressions,
	list->values = xrealloc(list->values,
			new_size * sizeof(struct ast_expression));
	list->expressions[list->n] = expression;
	list->values[list->n] = expr;
	list->n = new_size;
}



@@ 67,358 48,311 @@ keyword_list_appened(struct ast_keyword_list *list,

	const size_t new_size = list->n + 1;

	list->keys = realloc(list->keys,
	list->keys = xrealloc(list->keys,
			new_size * sizeof(struct ast_identifier));
	list->values = realloc(list->values,
	list->values = xrealloc(list->values,
			new_size * sizeof(struct ast_expression));
	list->keys[list->n] = key;
	list->values[list->n] = value;
	list->n = new_size;
}

struct ast_identifier *
parse_identifier(struct parser *parser)
static struct ast_identifier *
parse_identifier(struct token *token)
{
	struct ast_identifier *identifier = calloc(1,
			sizeof(struct ast_identifier));
	if (!identifier) {
		fatal("failed to allocate identifier node");
	}
	assert(token->type == T_IDENTIFIER);

	identifier->data = calloc(parser->last->n + 1, sizeof(char));
	strncpy(identifier->data, parser->last->data, parser->last->n);
	identifier->n = parser->last->n;
	struct ast_identifier *id = xcalloc(1, sizeof(struct ast_identifier));

	return identifier;
}
	id->n = token->string.n;
	id->data = strdup(token->string.data);

struct ast_string *
parse_string(struct parser *parser)
{
	struct ast_string *string = calloc(1, sizeof(struct ast_string));
	if (!string) {
		fatal("failed to allocate string node");
	}
	token_finish(token);

	string->data = calloc(parser->last->n + 1, sizeof(char));
	strncpy(string->data, parser->last->data, parser->last->n);
	string->n = parser->last->n;

	return string;
	return id;
}

/*
 * An array is a list containing an arbitrary number of any types
 * It is delimited by brackets, and separated by commas.
 *
 * arr = [1, 2, 3, 'soleil']
 */
struct ast_expression_list *
parse_array(struct parser *parser)
static struct ast_string *
parse_string(struct token *token)
{
	struct ast_expression_list *list = calloc(1,
			sizeof(struct ast_expression_list));
	if (!list) {
		fatal("failed to allocate array");
	}

	for (;;) {
		while (accept(parser, TOKEN_EOL));

		if (accept(parser, TOKEN_RBRACK)) {
			break;
		}
	assert(token->type == T_STRING);

		expression_list_appened(list, parse_expression(parser));
	struct ast_string *str = xcalloc(1, sizeof(struct ast_string));

		if (accept(parser, TOKEN_RBRACK)) {
			break;
		}
	str->n = token->string.n;
	str->data = strdup(token->string.data);

		expect(parser, TOKEN_COMMA);
	}
	token_finish(token);

	return list;
	return str;
}

struct ast_bool *
parse_bool(struct parser *parser)
static struct ast_expression *parse_primary(struct lexer *);

static struct ast_expression_list *
parse_array(struct lexer *lexer)
{
	struct ast_bool *boolean = calloc(1, sizeof(struct ast_bool));
	assert(boolean);
	struct ast_expression_list *arr = xcalloc(1,
			sizeof(struct ast_expression_list));

	if (parser->last->type == TOKEN_TRUE) {
		boolean->value = true;
	} else if (parser->last->type == TOKEN_FALSE) {
		boolean->value = false;
	struct token token = {0};
	while (lex(lexer, &token) != T_RBRACK) {
		unlex(lexer, &token);

		struct ast_expression *expr = parse_primary(lexer);

		if (lex(lexer, &token) == T_COMMA) {
			expression_list_appened(arr, expr);
		} else {
			unexpected(&token);
		}
	}

	return boolean;
	return arr;
}

struct ast_expression *
parse_primary(struct parser *parser)
static struct ast_expression *
parse_primary(struct lexer *lexer)
{
	struct ast_expression *expression = calloc(1,
			sizeof(struct ast_expression));
	if (!expression) {
		fatal("failed to allocate expression node");
	struct ast_expression *expr = xcalloc(1, sizeof(struct ast_expression));

	struct token token = {0};
	switch (lex(lexer, &token)) {
	case T_IDENTIFIER:
		expr->identifier = parse_identifier(&token);
		expr->type = AST_IDENTIFIER;
		break;
	case T_STRING:
		expr->string = parse_string(&token);
		expr->type = AST_STRING;
		break;
	case T_LBRACK:
		expr->array = parse_array(lexer);
		expr->type = AST_ARRAY;
		break;
	default:
		unexpected(&token);
		break;
	}

	if (accept(parser, TOKEN_IDENTIFIER)) {
		expression->type = EXPRESSION_IDENTIFIER;
		expression->data.identifier = parse_identifier(parser);
	} else if (accept(parser, TOKEN_STRING)) {
		expression->type = EXPRESSION_STRING;
		expression->data.string = parse_string(parser);
	} else if (accept(parser, TOKEN_LBRACK)) {
		expression->type = EXPRESSION_ARRAY;
		expression->data.array = parse_array(parser);
	} else if (accept(parser, TOKEN_TRUE) || accept(parser, TOKEN_FALSE)) {
		expression->type = EXPRESSION_BOOL;
		expression->data.boolean = parse_bool(parser);
	} else {
		fatal("unexpected token %s", token_to_string(parser->cur));
	}

	return expression;
	return expr;
}

struct ast_arguments *
parse_arguments(struct parser *parser)
static struct ast_arguments *
parse_arguments(struct lexer *lexer)
{
	struct ast_arguments *arguments = calloc(1,
			sizeof(struct ast_arguments));
	assert(arguments);
	struct ast_arguments *args = xcalloc(1, sizeof(struct ast_arguments));
	args->position = xcalloc(1, sizeof(struct ast_expression_list));
	args->keyword = xcalloc(1, sizeof(struct ast_keyword_list));

	arguments->args = calloc(1, sizeof(struct ast_expression_list));
	assert(arguments->args);
	arguments->kwargs = calloc(1, sizeof(struct ast_keyword_list));
	assert(arguments->kwargs);
	struct token token = {0};
	while (lex(lexer, &token) != T_RPAREN) {
		unlex(lexer, &token);

	for (;;) {
		while (accept(parser, TOKEN_EOL));
		struct ast_expression *expr = parse_expression(lexer);

		if (accept(parser, TOKEN_RPAREN)) {
		switch (lex(lexer, &token)) {
		case T_COMMA:
			expression_list_appened(args->position, expr);
			break;
		}
		case T_COLON:
			if (expr->type != AST_IDENTIFIER) {
				fatal("keyword must be an identifier");
			}
			struct ast_expression *value = parse_expression(lexer);

		struct ast_expression *expression = parse_expression(parser);
		if (accept(parser, TOKEN_COLON)) {
			if (expression->type != EXPRESSION_IDENTIFIER) {
				fatal("kwarg key must be an identifier");
			keyword_list_appened(args->keyword, expr->identifier,
					value);

			free(expr);
			if (lex(lexer, &token) != T_COMMA) {
				unexpected(&token);
			}
			keyword_list_appened(arguments->kwargs,
					expression->data.identifier,
					parse_expression(parser));
		} else {
			expression_list_appened(arguments->args, expression);
		}

		if (accept(parser, TOKEN_RPAREN)) {
			break;
		case T_RPAREN:
			expression_list_appened(args->position, expr);
			goto argument_out;
			break;
		default:
			unexpected(&token);
			break;
		}

		expect(parser, TOKEN_COMMA);
	}

	return arguments;
argument_out:
	return args;
}

static struct ast_expression *
parse_function(struct parser *parser, struct ast_expression *left)
static struct ast_function *
parse_function(struct lexer *lexer, struct ast_expression *expr)
{
	if (left->type != EXPRESSION_IDENTIFIER) {
		fatal("function should be an identifier");
	if (expr->type != AST_IDENTIFIER) {
		fatal("function should be called on an identifier");
	}

	struct ast_expression *expression = calloc(1,
			sizeof(struct ast_expression));
	assert(expression);

	expression->type = EXPRESSION_FUNCTION;
	expression->data.function = calloc(1, sizeof(struct ast_function));
	assert(expression->data.function);
	struct ast_function *func = xcalloc(1, sizeof(struct ast_function));

	expression->data.function->left = left->data.identifier;
	expression->data.function->right = parse_arguments(parser);
	func->left = expr->identifier;
	func->right = parse_arguments(lexer);

	return expression;
	return func;
}

struct ast_expression *
parse_method(struct parser *parser, struct ast_expression *left)
static struct ast_method *
parse_method(struct lexer *lexer, struct ast_expression *expr)
{
	assert(left);
	if (left->type != EXPRESSION_IDENTIFIER
			&& left->type != EXPRESSION_STRING) {
		fatal("method must be called on an identifier or a string");
	struct ast_method *method = xcalloc(1, sizeof(struct ast_method));
	struct ast_expression *left = xcalloc(1, sizeof(struct ast_expression));

	left->type = expr->type;
	switch (expr->type) {
	case AST_STRING:
		left->string = expr->string;
		break;
	case AST_IDENTIFIER:
		left->identifier = expr->identifier;
		break;
	default:
		fatal("Syntax error");
		break;
	}

	struct ast_expression *expression = calloc(1,
			sizeof(struct ast_expression));
	assert(expression);

	expression->type = EXPRESSION_METHOD;
	expression->data.method = calloc(1, sizeof(struct ast_method));
	assert(expression->data.method);

	expression->data.method->left = left;
	assert(expression->data.method->left);
	method->left = left;

	struct ast_expression *right = parse_expression(parser);
	if (right->type != EXPRESSION_FUNCTION) {
	struct ast_expression *func = parse_expression(lexer);
	if (func->type != AST_FUNCTION) {
		fatal("method right side must be a function");
	}

	expression->data.method->right = right->data.function;
	method->right = func->function;
	free(func);

	return expression;
	return method;
}

struct ast_expression *
parse_postfix(struct parser *parser)
{
	struct ast_expression *expression = parse_primary(parser);

	if (accept(parser, TOKEN_LPAREN)) {
		return parse_function(parser, expression);
	} else if (accept(parser, TOKEN_DOT)) {
		return parse_method(parser, expression);
	}

	return expression;
}

bool
is_assignment_op(struct parser *parser)
{
	static const enum token_type ops[] = {
		TOKEN_ASSIGN,
		TOKEN_STAREQ,
		TOKEN_SLASHEQ,
		TOKEN_MODEQ,
		TOKEN_PLUSEQ,
		TOKEN_MINEQ,
	};

	for (size_t i = 0; i < sizeof(ops) / sizeof(ops[0]); ++i) {
		if (parser->cur->type == ops[i]) {
			return true;
		}
	}

	return false;
}

enum ast_assignment_op
parse_assignment_op(struct parser *parser)
static struct ast_expression *
parse_postfix(struct lexer *lexer)
{
	if (accept(parser, TOKEN_ASSIGN)) {
		return ASSIGNMENT_ASSIGN;
	} else if (accept(parser, TOKEN_STAREQ)) {
		return ASSIGNMENT_STAREQ;
	} else if (accept(parser, TOKEN_SLASHEQ)) {
		return ASSIGNMENT_SLASHEQ;
	} else if (accept(parser, TOKEN_MODEQ)) {
		return ASSIGNMENT_MODEQ;
	} else if (accept(parser, TOKEN_PLUSEQ)) {
		return ASSIGNMENT_PLUSEQ;
	} else if (accept(parser, TOKEN_MINEQ)) {
		return ASSIGNMENT_MINEQ;
	} else {
		fatal("%s is not an assignment operation",
				token_to_string(parser->cur));
	struct ast_expression *expr = parse_primary(lexer);

	struct token token = {0};
	switch (lex(lexer, &token)) {
	case T_LPAREN:
		expr->function = parse_function(lexer, expr);
		expr->type = AST_FUNCTION;
		break;
	case T_DOT:
		expr->method = parse_method(lexer, expr);
		expr->type = AST_METHOD;
		break;
	case T_IDENTIFIER:
		expr->identifier = parse_identifier(&token);
		expr->type = AST_IDENTIFIER;
		break;
	default:
		unlex(lexer, &token);
		break;
	}

	return -1;
	return expr;
}

struct ast_expression *
parse_assignment(struct parser *parser, struct ast_expression *left)
static struct ast_assignment *
parse_assignment(struct lexer *lexer, struct ast_expression *left)
{
	if (left->type != EXPRESSION_IDENTIFIER) {
	if (left->type != AST_IDENTIFIER) {
		fatal("assignment target must be an identifier");
	}

	struct ast_expression *expression = calloc(1,
			sizeof(struct ast_expression));
	assert(expression);

	expression->type = EXPRESSION_ASSIGNMENT;
	expression->data.assignment = calloc(1, sizeof(struct ast_assignment));
	assert(expression->data.assignment);
	struct ast_assignment *ass = xcalloc(1, sizeof(struct ast_assignment));

	ass->left = left->identifier;

	struct token token = {0};
	switch (lex(lexer, &token)) {
	case T_ASSIGN:
		ass->op = ASSIGNMENT_ASSIGN;
		break;
	case T_STAREQ:
		ass->op = ASSIGNMENT_STAREQ;
		break;
	case T_SLASHEQ:
		ass->op = ASSIGNMENT_SLASHEQ;
		break;
	case T_MODEQ:
		ass->op = ASSIGNMENT_MODEQ;
		break;
	case T_PLUSEQ:
		ass->op = ASSIGNMENT_PLUSEQ;
		break;
	case T_MINEQ:
		ass->op = ASSIGNMENT_MINEQ;
		break;
	default:
		unexpected(&token);
	}

	expression->data.assignment->left = left->data.identifier;
	expression->data.assignment->op = parse_assignment_op(parser);
	expression->data.assignment->right = parse_expression(parser);
	ass->right = parse_expression(lexer);

	return expression;
	return ass;
}

struct ast_expression *
parse_expression(struct parser *parser)
static struct ast_expression *
parse_expression(struct lexer *lexer)
{
	//struct ast_expression *left = parse_or(parser);
	struct ast_expression *left = parse_postfix(parser);
	if (is_assignment_op(parser)) {
		return parse_assignment(parser, left);
	} else if (accept(parser, TOKEN_QM)) {
		fatal("todo condition expression");
	struct ast_expression *expr = parse_postfix(lexer);

	struct token token = {0};
	switch (lex(lexer, &token)) {
	case T_ASSIGN:
	case T_STAREQ:
	case T_SLASHEQ:
	case T_MODEQ:
	case T_PLUSEQ:
	case T_MINEQ:
		unlex(lexer, &token);
		expr->assignment = parse_assignment(lexer, expr);
		expr->type = AST_ASSIGNMENT;
		break;
	default:
		unlex(lexer, &token);
		break;
	}

	return left;
	return expr;
}

struct ast_statement *
parse_statement(struct parser *parser)
parse(struct lexer *lexer)
{
	struct ast_statement *statement = calloc(1,
			sizeof(struct ast_statement));
	if (!statement) {
		fatal("failed to allocate statement node");
	}
	struct ast_statement *root = NULL;

	while (accept(parser, TOKEN_EOL));

	if (accept(parser, TOKEN_EOF)) {
		free(statement);
		return NULL;
	} else if (accept(parser, TOKEN_FOREACH)) {
		statement->type = STATEMENT_ITERATION;
		fatal("TODO iteration statement");
	} else if (accept(parser, TOKEN_IF)) {
		statement->type = STATEMENT_SELECTION;
		fatal("TODO selection statement");
	} else {
		statement->type = STATEMENT_EXPRESSION;
		statement->data.expression = parse_expression(parser);
	}

	return statement;
}
	struct token token = {0};
	while (lex(lexer, &token) != T_EOF) {
		struct ast_statement *statement = xcalloc(1,
				sizeof(struct ast_statement));

struct ast_root
parse(const char *source_dir)
{
	info("Source dir: %s", source_dir);

	char source_path[PATH_MAX] = {0};
	snprintf(source_path, sizeof(source_path), "%s/%s", source_dir,
			"meson.build");

	struct parser parser = {0};
	lexer_init(&parser.lexer, source_path);
	parser.cur = lexer_tokenize(&parser.lexer);
		switch (token.type) {
		case T_FOREACH:
			fatal("TODO iteration statement");
			break;
		case T_IF:
			fatal("TODO selection statement");
			break;
		default:
			unlex(lexer, &token);
			statement->expression = parse_expression(lexer);
			statement->type = STATEMENT_EXPRESSION;
			break;
		}

	struct ast_root root = { 0 };
	while (parser.cur->type != TOKEN_EOF) {
		root.statements = realloc(root.statements,
				++root.n * sizeof(struct ast_statement *));
		root.statements[root.n - 1] = parse_statement(&parser);
		struct ast_statement **curr = &root;
		while (*curr) {
			curr = &(*curr)->next;
		}
		*curr = statement;
	}

	lexer_finish(&parser.lexer);

	return root;
}

M src/setup.c => src/setup.c +63 -15
@@ 1,23 1,44 @@
#define _XOPEN_SOURCE 700

#include "setup.h"
#include "log.h"
#include "getopt_long.h"
#include "parser.h"
#include "interpreter.h"
#include "ninja.h"

#include <errno.h>
#include <stddef.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>

#include <errno.h>
#include "getopt_long.h"
#include "interpreter.h"
#include "log.h"
#include "lexer.h"
#include "ninja.h"
#include "setup.h"
#include "util.h"

#ifndef PATH_MAX
#define PATH_MAX 4096
#endif
enum stage {
	STAGE_LEX,
	STAGE_PARSE,
	STAGE_EVAL,
	STAGE_EMIT
};

static enum stage
get_stage(void)
{
	const char *s = getenv("BOSON_STAGE");
	if (!s) {
		return STAGE_EMIT;
	} else if (strcmp(s, "EVAL") == 0) {
		return STAGE_EVAL;
	} else if (strcmp(s, "PARSE") == 0) {
		return STAGE_PARSE;
	} else if (strcmp(s, "LEX") == 0) {
		return STAGE_LEX;
	} else {
		fatal("unknown BOSON_STAGE value '%s'", s);
	}
	return STAGE_EMIT;
}

static int
setup_usage(void)


@@ 80,7 101,7 @@ setup(int argc, char **argv)
		source_dir = ".";
	}

	char *cwd = calloc(PATH_MAX, sizeof(char));
	char *cwd = xcalloc(PATH_MAX, sizeof(char));
	getcwd(cwd, PATH_MAX);

	char abs_source_dir[PATH_MAX] = {0}, abs_build_dir[PATH_MAX] = {0};


@@ 88,11 109,38 @@ setup(int argc, char **argv)
	snprintf(abs_build_dir, PATH_MAX, "%s/%s", cwd, build_dir);
	free(cwd);

	info("Version: " VERSION);
	info("Source dir: %s", source_dir);

	char source_path[PATH_MAX] = {0};
	snprintf(source_path, sizeof(source_path), "%s/%s", source_dir,
			"meson.build");

	enum stage stage = get_stage();

	struct lexer lexer = {0};
	lexer_init(&lexer, source_path);

	if (stage == STAGE_LEX) {
		info("Printing out tokens");
		struct token token = {0};
		while (lex(&lexer, &token) != T_EOF) {
			info("token: %s", token_str(&token));
			token_finish(&token);
		}
		lexer_finish(&lexer);
		return 0;
	}

	struct ast_statement *root = parse(&lexer);
	lexer_finish(&lexer);

	if (stage == STAGE_PARSE) {
		fatal("TODO print all statements");
	}

	struct ast_root root = parse(abs_source_dir);
	struct context ctx = interpret_ast(root);

	struct context ctx = interpret_ast(&root);
	// TODO free statements

	// TODO free ctx
	return emit_ninja(&ctx, abs_build_dir);

D src/token.c => src/token.c +0 -83
@@ 1,83 0,0 @@
#include "token.h"
#include "log.h"

#include <stdlib.h>

void
token_destroy(struct token *token) {
	if (token == NULL) {
		report("attemting to destroy null token");
		return;
	}

	if (token->data) {
		free(token->data);
	}

	free(token);
}

const char *
token_type_to_string(enum token_type type)
{
#define TOKEN_TRANSLATE(e) case e: return #e;
	switch (type) {
	TOKEN_TRANSLATE(TOKEN_EOF);
	TOKEN_TRANSLATE(TOKEN_EOL);
	TOKEN_TRANSLATE(TOKEN_LPAREN);
	TOKEN_TRANSLATE(TOKEN_RPAREN);
	TOKEN_TRANSLATE(TOKEN_LBRACK);
	TOKEN_TRANSLATE(TOKEN_RBRACK);
	TOKEN_TRANSLATE(TOKEN_LCURL);
	TOKEN_TRANSLATE(TOKEN_RCURL);
	TOKEN_TRANSLATE(TOKEN_DOT);
	TOKEN_TRANSLATE(TOKEN_COMMA);
	TOKEN_TRANSLATE(TOKEN_COLON);
	TOKEN_TRANSLATE(TOKEN_ASSIGN);
	TOKEN_TRANSLATE(TOKEN_PLUS);
	TOKEN_TRANSLATE(TOKEN_MINUS);
	TOKEN_TRANSLATE(TOKEN_STAR);
	TOKEN_TRANSLATE(TOKEN_SLASH);
	TOKEN_TRANSLATE(TOKEN_MODULO);
	TOKEN_TRANSLATE(TOKEN_PLUSEQ);
	TOKEN_TRANSLATE(TOKEN_MINEQ);
	TOKEN_TRANSLATE(TOKEN_STAREQ);
	TOKEN_TRANSLATE(TOKEN_SLASHEQ);
	TOKEN_TRANSLATE(TOKEN_MODEQ);
	TOKEN_TRANSLATE(TOKEN_EQ);
	TOKEN_TRANSLATE(TOKEN_NEQ);
	TOKEN_TRANSLATE(TOKEN_GT);
	TOKEN_TRANSLATE(TOKEN_GEQ);
	TOKEN_TRANSLATE(TOKEN_LT);
	TOKEN_TRANSLATE(TOKEN_LEQ);
	TOKEN_TRANSLATE(TOKEN_TRUE);
	TOKEN_TRANSLATE(TOKEN_FALSE);
	TOKEN_TRANSLATE(TOKEN_IF);
	TOKEN_TRANSLATE(TOKEN_ELSE);
	TOKEN_TRANSLATE(TOKEN_ELIF);
	TOKEN_TRANSLATE(TOKEN_ENDIF);
	TOKEN_TRANSLATE(TOKEN_AND);
	TOKEN_TRANSLATE(TOKEN_OR);
	TOKEN_TRANSLATE(TOKEN_NOT);
	TOKEN_TRANSLATE(TOKEN_QM);
	TOKEN_TRANSLATE(TOKEN_FOREACH);
	TOKEN_TRANSLATE(TOKEN_ENDFOREACH);
	TOKEN_TRANSLATE(TOKEN_IN);
	TOKEN_TRANSLATE(TOKEN_CONTINUE);
	TOKEN_TRANSLATE(TOKEN_BREAK);
	TOKEN_TRANSLATE(TOKEN_IDENTIFIER);
	TOKEN_TRANSLATE(TOKEN_STRING);
	TOKEN_TRANSLATE(TOKEN_NUMBER);
	default:
		report("unknown token");
		break;
	}
#undef TOKEN_TRANSLATE
	return "";
}

const char *
token_to_string(struct token *token)
{
	return token_type_to_string(token->type);
}

A src/util.c => src/util.c +28 -0
@@ 0,0 1,28 @@
#include "log.h"
#include "util.h"

#undef malloc
#undef calloc
#undef realloc

#include <stdlib.h>

void *
xcalloc(size_t n, size_t size)
{
	void *data = calloc(n, size);
	if (!data) {
		fatal("failed to allocate");
	}
	return data;
}

void *
xrealloc(void *ptr, size_t size)
{
	void *tmp = realloc(ptr, size);
	if (!tmp) {
		fatal("failed to reallocate");
	}
	return tmp;
}