~oriansj/M3-Preprocess

74630d0b78d805ae1c24914085f727dc3bcaf1c5 — Jeremiah Orians 2 years ago bc17933
Wire it up to do just enough to be considered a start
8 files changed, 1545 insertions(+), 59 deletions(-)

M c_stage1.c
M cc.h
A cc_macro.c
M cc_reader.c
M cpp.c
A dump.c
A include_expansion.c
A makefile
M c_stage1.c => c_stage1.c +108 -49
@@ 23,8 23,8 @@
int strtoint(char *a);
struct token_list* new_token(char* s, int size, char* filename, int linenum, struct token_list* prev, struct token_list* next);
struct token_list* reverse_list(struct token_list* head);
void dump_stage_raw();
void dump_stage_straight();
void dump_stage_straight(struct token_list* token);
void dump_stage_raw(struct token_list* token);

struct pattern* new_pattern(char* s, struct pattern* next)
{


@@ 56,9 56,9 @@ char* get_new_string(char* s)
	return r;
}

void replace_trigraphs()
void replace_trigraphs(struct token_list* token)
{
	struct token_list* i = global_token;
	struct token_list* i = token;
	struct pattern* trigraphhash = new_pattern("?", new_pattern("?", new_pattern("=", NULL)));
	struct pattern* trigraphopenbracket = new_pattern("?", new_pattern("?", new_pattern("(", NULL)));
	struct pattern* trigraphbackslash = new_pattern("?", new_pattern("?", new_pattern("/", NULL)));


@@ 85,10 85,89 @@ void replace_trigraphs()
	}
}

void combine_common()
void collapse_keyword(struct token_list* token, char* replace, char* replacement)
{
	struct token_list* i = global_token;
	require(NULL != token, "passed incorrect token in collapse_keyword\n");
	require(NULL != token->next, "# EOF is not a valid pattern\n");
	require(NULL != replace, "We can't replace a null string\n");
	require(NULL != replacement, "The replacement can't be null either\n");
	if(!match(token->next->s, replace)) return;
	free(token->s);
	struct token_list* hold = token->next;
	token->next = token->next->next;
	token->s = replacement;
	free(hold->s);
	free(hold);
}

/* Because the reader doesn't understand trigraphs, we have to fixup some keywords */
void fixup_trigraph_garbage(struct token_list* token)
{
	struct token_list* i = token;
	struct token_list* hold;
	while(NULL != i)
	{
		if(NULL == i->next) break;
		if(match("#", i->s))
		{
			while(match(" ", i->next->s) || match("\t", i->next->s) || match("\n", i->next->s))
			{
				hold = i->next;
				i->next = i->next->next;
				free(hold->s);
				free(hold);
				if(NULL == i->next) break;
			}
			collapse_keyword(i, "if", "#if");
			collapse_keyword(i, "ifdef", "#ifdef");
			collapse_keyword(i, "ifndef", "#ifndef");
			collapse_keyword(i, "else", "#else");
			collapse_keyword(i, "define", "#define");
			collapse_keyword(i, "include", "#include");
			collapse_keyword(i, "undef", "#undef");
			collapse_keyword(i, "line", "#line");
			collapse_keyword(i, "pragma", "#pragma");
		}
		else i = i->next;
	}
}

void collapse_library_name(struct token_list* token)
{
	struct token_list* i = token;
	struct token_list* hold;
	reset_hold_string();
	while(!match(">", i->s))
	{
		strcat(hold_string, i->s);
		require(NULL != i->next, "incomplete #include < block>\n");
		i = i->next;
	}
	strcat(hold_string, i->s);
	/* Free up old string */
	free(token->s);
	token->s = calloc(strlen(hold_string)+1, sizeof(char));
	strcpy(token->s, hold_string);
	hold = i;
	i = token->next;
	token->next = hold->next;

	/* Clean up the bits */
	while(!match(">", i->s))
	{
		hold = i;
		i = i->next;
		free(hold->s);
		free(hold);
	}
	free(i->s);
	free(i);
}


void combine_common(struct token_list* token)
{
	struct token_list* i = token;

	struct pattern* decrement = new_pattern("-", new_pattern("-", NULL));
	struct pattern* increment = new_pattern("+", new_pattern("+", NULL));


@@ 111,6 190,7 @@ void combine_common()
	struct pattern* xorassign = new_pattern("^", new_pattern("=", NULL));
	struct pattern* leftassign = new_pattern("<", new_pattern("<", new_pattern("=", NULL)));
	struct pattern* rightassign = new_pattern(">", new_pattern(">", new_pattern("=", NULL)));
	struct pattern* doublebang = new_pattern("#", new_pattern("#", NULL));

	while(NULL != i)
	{


@@ 119,41 199,13 @@ void combine_common()
		if(match(i->s, "#include"))
		{
			i = i->next;
			while(match(" ", i->s))
			while(match(" ", i->s) || match("\t", i->s))
			{
				i = i->next;
			}
			if(match("<", i->s))
			{
				reset_hold_string();
				strcat(hold_string, "<");
				require(NULL != i->next, "incomplete #include < block>\n");
				strcat(hold_string, i->next->s);
				require(NULL != i->next->next, "incomplete #include <block>\n");
				require(match(i->next->next->s, "."), "not a proper #include <name>\n");
				strcat(hold_string, ".");
				require(NULL != i->next->next->next, "incomplete #include <block>\n");
				strcat(hold_string, i->next->next->next->s);
				require(NULL != i->next->next->next->next, "incomplete #include <block>\n");
				require(match(i->next->next->next->next->s, ">"), "not a properly terminated #include <name>\n");
				strcat(hold_string, ">");
				/* Free up old string */
				free(i->s);
				i->s = calloc(strlen(hold_string)+1, sizeof(char));
				strcpy(i->s, hold_string);
				hold = i->next;
				i->next = i->next->next->next->next->next;

				/* Clean up the bits */
				free(hold->next->next->next->s);
				free(hold->next->next->next);
				free(hold->next->next->s);
				free(hold->next->next);
				free(hold->next->s);
				free(hold->next);
				free(hold->s);
				free(hold);

				collapse_library_name(i);
			}
			else
			{


@@ 181,22 233,29 @@ void combine_common()
		else if(pattern_compress(i, shiftleft)) i->s = "<<";
		else if(pattern_compress(i, shiftright)) i->s = ">>";
		else if(pattern_compress(i, arrow)) i->s = "->";
		else if(pattern_compress(i,doublebang)) i->s = "##";
		else i = i->next;
	}
}

void stage1_preprocess()

struct token_list* stage1_preprocess(struct token_list* token, int outer_stage)
{
	if(1 == DUMP_STAGE) dump_stage_raw();

	global_token = reverse_list(global_token);

	if(2 == DUMP_STAGE) dump_stage_raw();
	if(3 == DUMP_STAGE) dump_stage_straight();
	replace_trigraphs();
	if(4 == DUMP_STAGE) dump_stage_raw();
	if(5 == DUMP_STAGE) dump_stage_straight();
	combine_common();
	if(6 == DUMP_STAGE) dump_stage_raw();
	if(7 == DUMP_STAGE) dump_stage_straight();
	if((outer_stage+1) == DUMP_STAGE) dump_stage_raw(token);
	if((outer_stage+2) == DUMP_STAGE) dump_stage_straight(token);

	token = reverse_list(token);

	if((outer_stage+3) == DUMP_STAGE) dump_stage_raw(token);
	if((outer_stage+4) == DUMP_STAGE) dump_stage_straight(token);
	replace_trigraphs(token);
	if((outer_stage+5) == DUMP_STAGE) dump_stage_raw(token);
	if((outer_stage+6) == DUMP_STAGE) dump_stage_straight(token);
	fixup_trigraph_garbage(token);
	if((outer_stage+7) == DUMP_STAGE) dump_stage_raw(token);
	if((outer_stage+8) == DUMP_STAGE) dump_stage_straight(token);
	combine_common(token);
	if((outer_stage+9) == DUMP_STAGE) dump_stage_raw(token);
	if((outer_stage+10) == DUMP_STAGE) dump_stage_straight(token);
	return token;
}

M cc.h => cc.h +1 -1
@@ 33,7 33,6 @@ void require(int bool, char* error);
char* int2str(int x, int base, int signed_p);
void reset_hold_string();


struct type
{
	struct type* next;


@@ 76,4 75,5 @@ struct pattern
};

#include "cc_globals.h"
void line_error_token(struct token_list *token);
#endif

A cc_macro.c => cc_macro.c +1039 -0
@@ 0,0 1,1039 @@
/* Copyright (C) 2021 Sanne Wouda
 * Copyright (C) 2021 Andrius Štikonas <andrius@stikonas.eu>
 * Copyright (C) 2022 Jan (janneke) Nieuwenhuizen <janneke@gnu.org>
 * This file is part of M2-Planet.
 *
 * M2-Planet is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * M2-Planet is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with M2-Planet.  If not, see <http://www.gnu.org/licenses/>.
 */
#include "cc.h"

void require(int bool, char* error);
int strtoint(char* a);
void line_error_token(struct token_list* list);
struct token_list* eat_token(struct token_list* head);
struct token_list* reverse_list(struct token_list* head);
void expand_includes(struct token_list* token, int depth);

int recursive_expand;

struct conditional_inclusion
{
	struct conditional_inclusion* prev;
	int include; /* 1 == include, 0 == skip */
	int previous_condition_matched; /* 1 == all subsequent conditions treated as FALSE */
};

struct macro_list
{
	struct macro_list* next;
	char* symbol;
	struct token_list* expansion;
	struct token_list* arguments;
};

struct macro_list* macro_env;
struct conditional_inclusion* conditional_inclusion_top;

/* point where we are currently modifying the global_token list */
struct token_list* macro_token;

void init_macro_env(char* sym, char* value, char* source, int num)
{
	struct macro_list* hold = macro_env;
	macro_env = calloc(1, sizeof(struct macro_list));
	macro_env->symbol = sym;
	macro_env->next = hold;
	macro_env->expansion = calloc(1, sizeof(struct token_list));
	macro_env->expansion->s = value;
	macro_env->expansion->filename = source;
	macro_env->expansion->linenumber = num;
}

void _eat_current_token(int eat_whitespace)
{
	int update_global_token = FALSE;
	if (macro_token == global_token)
		update_global_token = TRUE;

	macro_token = eat_token(macro_token);
	if(eat_whitespace)
	{
		while (macro_token->s[0] == ' ')
		macro_token = eat_token(macro_token);
	}

	if(update_global_token)
		global_token = macro_token;
}

void eat_current_token()
{
	_eat_current_token(TRUE);
}

void eat_current_token_without_space()
{
	_eat_current_token(FALSE);
}

struct token_list* lookup_token(struct token_list* token, struct token_list* arguments)
{
	if(NULL == token)
	{
		fputs("null token received in token\n", stderr);
		exit(EXIT_FAILURE);
	}

	struct token_list* hold = arguments;

	while (NULL != hold)
	{
		if (match(token->s, hold->s))
		{
			/* found! */
			return hold->expansion;
		}

		hold = hold->next;
	}

	/* not found! */
	return NULL;
}

/* returns the first token inserted; inserts *before* point */
struct token_list* insert_tokens(struct token_list* point, struct token_list* token)
{
	struct token_list* copy;
	struct token_list* first = NULL;

	while (NULL != token)
	{
		copy = calloc(1, sizeof(struct token_list));
		copy->s = token->s;
		copy->filename = token->filename;
		copy->linenumber = token->linenumber;

		if(NULL == first)
		{
			first = copy;
		}

		copy->next = point;

		if (NULL != point)
		{
			copy->prev = point->prev;

			if(NULL != point->prev)
			{
				point->prev->next = copy;
			}

			point->prev = copy;
		}

		token = token->next;
	}

	return first;
}

/* returns the first token inserted; inserts *before* point */
struct token_list* copy_list(struct token_list* token)
{
	struct token_list* copy;
	struct token_list* prev = NULL;

	while (NULL != token)
	{
		copy = calloc(1, sizeof(struct token_list));
		copy->s = token->s;

		copy->next = prev;
		copy->prev = prev;
		prev = copy;
		token = token->next;
	}
	copy = reverse_list(copy);

	return copy;
}

struct macro_list* lookup_macro(struct token_list* token)
{
	if(NULL == token)
	{
		line_error_token(macro_token);
		fputs("null token received in lookup_macro\n", stderr);
		exit(EXIT_FAILURE);
	}

	struct macro_list* hold = macro_env;

	while (NULL != hold)
	{
		if (match(token->s, hold->symbol))
		{
			/* found! */
			return hold;
		}

		hold = hold->next;
	}

	/* not found! */
	return NULL;
}

void remove_macro(struct token_list* token)
{
	if(NULL == token)
	{
		line_error_token(macro_token);
		fputs("received a null in remove_macro\n", stderr);
		exit(EXIT_FAILURE);
	}

	struct macro_list* hold = macro_env;
	struct macro_list* temp;

	/* Deal with the first element */
	if (match(token->s, hold->symbol)) {
		macro_env = hold->next;
		free(hold);
		return;
	}

	/* Remove element form the middle of linked list */
	while (NULL != hold->next)
	{
		if (match(token->s, hold->next->symbol))
		{
			temp = hold->next;
			hold->next = hold->next->next;
			free(temp);
			return;
		}

		hold = hold->next;
	}

	/* nothing to undefine */
	return;
}

int macro_expression();
int macro_variable()
{
	int value = 0;
	struct macro_list* hold = lookup_macro(macro_token);
	if (NULL != hold)
	{
		if(NULL == hold->expansion)
		{
			line_error_token(macro_token);
			fputs("hold->expansion is a null\n", stderr);
			exit(EXIT_FAILURE);
		}
		value = strtoint(hold->expansion->s);
	}
	eat_current_token();
	return value;
}

int macro_number()
{
	int result = strtoint(macro_token->s);
	eat_current_token();
	return result;
}

int macro_primary_expr()
{
	int defined_has_paren = FALSE;
	int hold;
	require(NULL != macro_token, "got an EOF terminated macro primary expression\n");

	if('-' == macro_token->s[0])
	{
		eat_current_token();
		return -macro_primary_expr();
	}
	else if('!' == macro_token->s[0])
	{
		eat_current_token();
		return !macro_primary_expr();
	}
	else if('(' == macro_token->s[0])
	{
		eat_current_token();
		return macro_expression();
	}
	else if(match("defined", macro_token->s))
	{
		eat_current_token();

		require(NULL != macro_token, "got an EOF terminated macro defined expression\n");

		if('(' == macro_token->s[0])
		{
			defined_has_paren = TRUE;
			eat_current_token();
		}

		if (NULL != lookup_macro(macro_token))
		{
			hold = TRUE;
		}
		else
		{
			hold = FALSE;
		}
		eat_current_token();

		if(TRUE == defined_has_paren)
		{
			if(NULL == macro_token)
			{
				line_error_token(macro_token);
				fputs("unterminated define ( statement\n", stderr);
				exit(EXIT_FAILURE);
			}
			require(')' == macro_token->s[0], "missing close parenthesis for defined()\n");
			eat_current_token();
		}

		return hold;
	}
	else if(in_set(macro_token->s[0], "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_"))
	{
		return macro_variable();
	}
	else if(in_set(macro_token->s[0], "0123456789"))
	{
		return macro_number();
	}
	else
	{
		return 0;    /* FIXME: error handling */
	}
}

int macro_additive_expr()
{
	int lhs = macro_primary_expr();
	int hold;

	require(NULL != macro_token, "got an EOF terminated macro additive expression\n");
	if(match("+", macro_token->s))
	{
		eat_current_token();
		return lhs + macro_additive_expr();
	}
	else if(match("-", macro_token->s))
	{
		eat_current_token();
		return lhs - macro_additive_expr();
	}
	else if(match("*", macro_token->s))
	{
		eat_current_token();
		return lhs * macro_additive_expr();
	}
	else if(match("/", macro_token->s))
	{
		eat_current_token();
		hold = macro_additive_expr();
		require(0 != hold, "divide by zero not valid even in C macros\n");
		return lhs / hold;
	}
	else if(match("%", macro_token->s))
	{
		eat_current_token();
		hold = macro_additive_expr();
		require(0 != hold, "modulus by zero not valid even in C macros\n");
		return lhs % hold;
	}
	else if(match(">>", macro_token->s))
	{
		eat_current_token();
		return lhs >> macro_additive_expr();
	}
	else if(match("<<", macro_token->s))
	{
		eat_current_token();
		return lhs << macro_additive_expr();
	}
	else
	{
		return lhs;
	}
}

int macro_relational_expr()
{
	int lhs = macro_additive_expr();

	if(match("<", macro_token->s))
	{
		eat_current_token();
		return lhs < macro_relational_expr();
	}
	else if(match("<=", macro_token->s))
	{
		eat_current_token();
		return lhs <= macro_relational_expr();
	}
	else if(match(">=", macro_token->s))
	{
		eat_current_token();
		return lhs >= macro_relational_expr();
	}
	else if(match(">", macro_token->s))
	{
		eat_current_token();
		return lhs > macro_relational_expr();
	}
	else if(match("==", macro_token->s))
	{
		eat_current_token();
		return lhs == macro_relational_expr();
	}
	else if(match("!=", macro_token->s))
	{
		eat_current_token();
		return lhs != macro_relational_expr();
	}
	else
	{
		return lhs;
	}
}

int macro_bitwise_expr()
{
	int rhs;
	int lhs = macro_relational_expr();

	if(match("&", macro_token->s))
	{
		eat_current_token();
		return lhs & macro_bitwise_expr();
	}
	else if(match("&&", macro_token->s))
	{
		eat_current_token();
		rhs = macro_bitwise_expr();
		return lhs && rhs;
	}
	else if(match("|", macro_token->s))
	{
		eat_current_token();
		rhs = macro_bitwise_expr();
		return lhs | rhs;
	}
	else if(match("||", macro_token->s))
	{
		eat_current_token();
		rhs = macro_bitwise_expr();
		return lhs || rhs;
	}
	else if(match("^", macro_token->s))
	{
		eat_current_token();
		rhs = macro_bitwise_expr();
		return lhs ^ rhs;
	}
	else
	{
		return lhs;
	}
}

int macro_expression()
{
	return macro_bitwise_expr();
}

void handle_define()
{
	struct macro_list* hold;
	struct token_list* arg;
	struct token_list* expansion_end = NULL;

	/* don't use #define statements from non-included blocks */
	int conditional_define = TRUE;
	if(NULL != conditional_inclusion_top)
	{
		if(FALSE == conditional_inclusion_top->include)
		{
			conditional_define = FALSE;
		}
	}

	eat_current_token();

	require(NULL != macro_token, "got an EOF terminated #define\n");
	require('\n' != macro_token->s[0], "unexpected newline after #define\n");

	/* insert new macro */
	hold = calloc(1, sizeof(struct macro_list));
	hold->symbol = macro_token->s;
	hold->next = macro_env;
	/* provided it isn't in a non-included block */
	if(conditional_define) macro_env = hold;

	/* discard the macro name */
	eat_current_token_without_space();

	/* Handle macro arguments */
	if(macro_token->s[0] == '(')
	{
		/* discard ( */
		eat_current_token();
		require(NULL != macro_token, "got an EOF terminated #define\n");
		if(macro_token->s[0] != ')')
		{
			arg = calloc(1, sizeof(struct token_list));
			arg->s = macro_token->s;
			hold->arguments = arg;
			eat_current_token();
			require(NULL != macro_token, "incomplete macro call\n");
			while(macro_token->s[0] == ',')
			{
				eat_current_token();
				require(NULL != macro_token, "incomplete macro call, got an EOF instead of an argument\n");
				arg = calloc(1, sizeof(struct token_list));
				arg->s = macro_token->s;
				arg->next = hold->arguments;
				hold->arguments = arg;
				eat_current_token();
				require(NULL != macro_token, "incomplete macro call\n");
			}
		}
		eat_current_token();

		/* Reverse argument list */
		hold->arguments = reverse_list(hold->arguments);

		require(NULL != macro_token, "got an EOF terminated #define\n");
	}
	else if(macro_token->s[0] == ' ')
	{
		eat_current_token();
	}

	while (TRUE)
	{
		require(NULL != macro_token, "got an EOF terminated #define\n");

		if ('\n' == macro_token->s[0])
		{
			if(NULL == expansion_end)
			{
				hold->expansion = NULL;
				expansion_end = macro_token;
				return;
			}
			expansion_end->next = NULL;
			return;
		}
		else if(('/' == macro_token->s[0]) && ('*' == macro_token->s[1]))
		{
			eat_current_token();
			continue;
		}
		else if(('/' == macro_token->s[0]) && ('/' == macro_token->s[1]))
		{
			macro_token->s = "\n";
			if(NULL == expansion_end)
			{
				hold->expansion = NULL;
				expansion_end = macro_token;
				return;
			}
			expansion_end->next = NULL;
			return;
		}

		if(NULL == hold)
		{
			eat_current_token();
			continue;
		}

		expansion_end = macro_token;

		/* in the first iteration, we set the first token of the expansion, if
		   it exists */
		if (NULL == hold->expansion)
		{
			hold->expansion = macro_token;
		}

		/* throw away if not used */
		if(!conditional_define && (NULL != hold))
		{
			free(hold);
			hold = NULL;
		}

		eat_current_token();
	}
}

void handle_undef()
{
	eat_current_token();
	remove_macro(macro_token);
	eat_current_token();
}

void handle_error(int warning_p)
{
	/* don't use #error statements from non-included blocks */
	int conditional_error = TRUE;
	if(NULL != conditional_inclusion_top)
	{
		if(FALSE == conditional_inclusion_top->include)
		{
			conditional_error = FALSE;
		}
	}
	eat_current_token();
	/* provided it isn't in a non-included block */
	if(conditional_error)
	{
		line_error_token(macro_token);
		if(warning_p) fputs(" warning: #warning ", stderr);
		else fputs(" error: #error ", stderr);
		while (TRUE)
		{
			if ('\n' == macro_token->s[0]) break;
			fputs(macro_token->s, stderr);
			macro_token = macro_token->next;
		}
		fputs("\n", stderr);
		if(!warning_p) exit(EXIT_FAILURE);
	}
	while (TRUE)
	{
		/* discard the error */
		if ('\n' == macro_token->s[0])
		{
			return;
		}
		eat_current_token();
	}
}

void macro_directive()
{
	int hold;
	struct conditional_inclusion *t;
	int result;

	/* FIXME: whitespace is allowed between "#"" and "if" */
	if(match("#if", macro_token->s))
	{
		eat_current_token();
		/* evaluate constant integer expression */
		result = macro_expression();
		/* push conditional inclusion */
		t = calloc(1, sizeof(struct conditional_inclusion));
		t->prev = conditional_inclusion_top;
		conditional_inclusion_top = t;
		t->include = TRUE;

		if(FALSE == result)
		{
			t->include = FALSE;
		}

		t->previous_condition_matched = t->include;
	}
	else if(match("#ifdef", macro_token->s))
	{
		eat_current_token();
		require(NULL != macro_token, "got an EOF terminated macro defined expression\n");
		if (NULL != lookup_macro(macro_token))
		{
			result = TRUE;
		}
		else
		{
			result = FALSE;
		}
		eat_current_token();

		/* push conditional inclusion */
		t = calloc(1, sizeof(struct conditional_inclusion));
		t->prev = conditional_inclusion_top;
		conditional_inclusion_top = t;
		t->include = TRUE;

		if(FALSE == result)
		{
			t->include = FALSE;
		}

		t->previous_condition_matched = t->include;
	}
	else if(match("#ifndef", macro_token->s))
	{
		eat_current_token();
		require(NULL != macro_token, "got an EOF terminated macro defined expression\n");
		if (NULL != lookup_macro(macro_token))
		{
			result = FALSE;
		}
		else
		{
			result = TRUE;
		}
		eat_current_token();

		/* push conditional inclusion */
		t = calloc(1, sizeof(struct conditional_inclusion));
		t->prev = conditional_inclusion_top;
		conditional_inclusion_top = t;
		t->include = TRUE;

		if(FALSE == result)
		{
			t->include = FALSE;
		}

		t->previous_condition_matched = t->include;
	}
	else if(match("#elif", macro_token->s))
	{
		eat_current_token();
		result = macro_expression();
		require(NULL != conditional_inclusion_top, "#elif without leading #if\n");
		conditional_inclusion_top->include = result && !conditional_inclusion_top->previous_condition_matched;
		conditional_inclusion_top->previous_condition_matched =
		    conditional_inclusion_top->previous_condition_matched || conditional_inclusion_top->include;
	}
	else if(match("#else", macro_token->s))
	{
		eat_current_token();
		require(NULL != conditional_inclusion_top, "#else without leading #if\n");
		conditional_inclusion_top->include = !conditional_inclusion_top->previous_condition_matched;
	}
	else if(match("#endif", macro_token->s))
	{
		if(NULL == conditional_inclusion_top)
		{
			line_error_token(macro_token);
			fputs("unexpected #endif\n", stderr);
			exit(EXIT_FAILURE);
		}

		eat_current_token();
		/* pop conditional inclusion */
		t = conditional_inclusion_top;
		conditional_inclusion_top = conditional_inclusion_top->prev;
		free(t);
	}
	else if(match("#define", macro_token->s))
	{
		handle_define();
	}
	else if(match("#undef", macro_token->s))
	{
		handle_undef();
	}
	else if(match("#error", macro_token->s))
	{
		handle_error(FALSE);
	}
	else if(match("#warning", macro_token->s))
	{
		handle_error(TRUE);
	}
	else if(match("#include", macro_token->s))
	{
		if(NULL != conditional_inclusion_top)
		{
			if(TRUE == conditional_inclusion_top->include)
			{
				recursive_expand = recursive_expand + 100;
				require(NULL != macro_token->next, "#include statements require a file to read\n");
				expand_includes(global_token->next, recursive_expand);
			}
		}
		else
		{
			recursive_expand = recursive_expand + 100;
			require(NULL != macro_token->next, "#include statements require a file to read\n");
			expand_includes(global_token->next, recursive_expand);
		}
		while(TRUE)
		{
			if(NULL == macro_token)
			{
				return;
			}

			if('\n' == macro_token->s[0])
			{
				return;
			}

			eat_current_token();
		}
	}
	else if(match("#FILENAME", macro_token->s))
	{
		while(TRUE)
		{
			if(NULL == macro_token)
			{
				return;
			}

			if('\n' == macro_token->s[0])
			{
				return;
			}

			eat_current_token();
		}
	}
	else if(('#' == macro_token->s[0]) && (' ' == macro_token->s[1]))
	{
		while(TRUE)
		{
			if(NULL == macro_token)
			{
				return;
			}

			if('\n' == macro_token->s[0])
			{
				return;
			}

			eat_current_token();
		}
	}
	else
	{
		/* Put a big fat warning but see if we can just ignore */
		fputs(">>WARNING<<\n>>WARNING<<\n", stderr);
		line_error_token(macro_token);
		fputs("feature: ", stderr);
		fputs(macro_token->s, stderr);
		fputs(" unsupported in M2-Planet\nIgnoring line, may result in bugs\n>>WARNING<<\n>>WARNING<<\n\n", stderr);

		/* unhandled macro directive; let's eat until a newline; om nom nom */
		while(TRUE)
		{
			if(NULL == macro_token)
			{
				return;
			}

			if('\n' == macro_token->s[0])
			{
				return;
			}

			eat_current_token();
		}
	}
}

struct token_list* expand_macro_functions(struct token_list* expansion, struct token_list* arguments)
{
	struct token_list* expanded_token;
	struct token_list* head;
	struct token_list* hold; /* Same as head unless head == NULL */
	head = copy_list(expansion);
	while(NULL != head)
	{
		expanded_token = lookup_token(head, arguments);
		hold = head;
		if(NULL != expanded_token)
		{
			insert_tokens(head, expanded_token);
			hold = head->prev;
			head = eat_token(head);
		}
		else
		{
			head = head->next;
		}
	}
	while(NULL != hold->prev) hold = hold->prev;
	return hold;
}

void eat_until_endif()
{
	/* This #if block is nested inside of an #if block that needs to be dropped, lose EVERYTHING */
	do
	{
		if(match("#if", macro_token->s) || match("#ifdef", macro_token->s) || match("#ifndef", macro_token->s))
		{
			eat_current_token();
			eat_until_endif();
		}

		eat_current_token();
		require(NULL != macro_token, "Unterminated #if block\n");
	} while(!match("#endif", macro_token->s));
}

void eat_block()
{
	/* This conditional #if block is wrong, drop everything until the #elif/#else/#endif */
	do
	{
		if(match("#if", macro_token->s) || match("#ifdef", macro_token->s) || match("#ifndef", macro_token->s))
		{
			eat_current_token();
			eat_until_endif();
		}

		eat_current_token();
		require(NULL != macro_token, "Unterminated #if block\n");
	} while(!match("#elif", macro_token->s) && !match("#else", macro_token->s) && !match("#endif", macro_token->s));
}

struct token_list* maybe_expand(struct token_list* token)
{
	if(NULL == token)
	{
		line_error_token(macro_token);
		fputs("maybe_expand passed a null token\n", stderr);
		exit(EXIT_FAILURE);
	}

	struct macro_list* hold = lookup_macro(token);
	struct token_list* hold2;
	struct token_list* hold3;
	struct token_list* hold4;
	if(NULL == token->next)
	{
		line_error_token(macro_token);
		fputs("we can't expand a null token: ", stderr);
		fputs(token->s, stderr);
		fputc('\n', stderr);
		exit(EXIT_FAILURE);
	}

	if (NULL == hold)
	{
		return token->next;
	}

	if(match("__M2__", token->s)) return token->next;

	token = eat_token(token);

	if (NULL == hold->expansion)
	{
		return token->next;
	}

	/* Match macro arguments with stored names */
	hold3 = hold->arguments;
	if(NULL != hold3)
	{
		if(token->s[0] == ' ')
		{
			token = eat_token(token);
		}
		require('(' == token->s[0], "missing open parenthesis for macro function\n");
		token = eat_token(token);
		require(NULL != token, "got an EOF terminated macro function\n");
		do
		{
			hold2 = calloc(1, sizeof(struct token_list));
			hold2->s = token->s;
			hold2->next = hold->arguments->expansion;
			hold->arguments->expansion = hold2;
			token = eat_token(token);
			require(NULL != token, "incomplete macro call\n");
			if(token->s[0] == ',')
			{
				hold->arguments->expansion = reverse_list(hold->arguments->expansion);
				hold->arguments = hold->arguments->next;
				require(NULL != hold->arguments, "too many arguments in macro call\n");
				token = eat_token(token);
				require(NULL != token, "incomplete macro call\n");
			}
		} while(token->s[0] != ')');
		hold->arguments->expansion = reverse_list(hold->arguments->expansion);
		hold->arguments = hold3;
		token = eat_token(token);
	}
	hold4 = expand_macro_functions(hold->expansion, hold->arguments);
	hold4 = insert_tokens(token, hold4);

	return hold4;
}

void preprocess()
{
	int start_of_line = TRUE;
	macro_token = global_token;

	while(NULL != macro_token)
	{
		if(start_of_line && '#' == macro_token->s[0])
		{
			macro_directive();

			if(macro_token)
			{
				if('\n' != macro_token->s[0])
				{
					line_error_token(macro_token);
					fputs("newline expected at end of macro directive\n", stderr);
					fputs("found: '", stderr);
					fputs(macro_token->s, stderr);
					fputs("'\n", stderr);
					exit(EXIT_FAILURE);
				}
			}
		}
		else if('\n' == macro_token->s[0])
		{
			start_of_line = TRUE;
			macro_token = macro_token->next;
		}
		else
		{
			start_of_line = FALSE;
			if(NULL == conditional_inclusion_top)
			{
				macro_token = maybe_expand(macro_token);
			}
			else if(!conditional_inclusion_top->include)
			{
				/* rewrite the token stream to exclude the current token */
				eat_block();
				start_of_line = TRUE;
			}
			else
			{
				macro_token = maybe_expand(macro_token);
			}
		}
	}
}

M cc_reader.c => cc_reader.c +14 -5
@@ 214,8 214,9 @@ struct token_list* reverse_list(struct token_list* head)
	return root;
}

void insert_file_header(char* name, int line)
struct token_list* make_file_header(char* name, int line)
{
	struct token_list* r;
	if(C_PREPROCESS) line = line-1;
	char* hold_line = int2str(line, 10, FALSE);



@@ 237,8 238,7 @@ void insert_file_header(char* name, int line)
			strcat(hold_string, "\" ");
		}
		strcat(hold_string, " ");
		token = new_token(hold_string, strlen(hold_string)+2, name, line, token, token);
		token = new_token("\n", 3, name, line, token, token);
		r = new_token(hold_string, strlen(hold_string)+2, name, line, NULL, NULL);
	}
	else
	{


@@ 247,9 247,18 @@ void insert_file_header(char* name, int line)
		strcat(hold_string, name);
		strcat(hold_string, " ");
		strcat(hold_string, hold_line);
		token = new_token(hold_string, strlen(hold_string)+2, name, line, token, token);
		token = new_token("\n", 3, name, line, token, token);
		r = new_token(hold_string, strlen(hold_string)+2, name, line, NULL, NULL);
	}
	return r;
}

void insert_file_header(char* name, int line)
{
	struct token_list* hold = make_file_header(name, line);
	hold->next = token;
	hold->prev = token;
	token = hold;
	token = new_token("\n", 3, name, line, token, token);
}

struct token_list* read_all_tokens(FILE* a, struct token_list* current, char* filename)

M cpp.c => cpp.c +96 -4
@@ 22,11 22,69 @@

int strtoint(char *a);
struct token_list* read_all_tokens(FILE* a, struct token_list* current, char* filename);
void dump_stage_straight();
void stage1_preprocess();
void dump_stage_straight(struct token_list* token);
void dump_stage_raw(struct token_list* token);
struct token_list* stage1_preprocess(struct token_list* a, int outer_stage);
void init_macro_env(char* sym, char* value, char* source, int num);
void preprocess();

void setup_architecture(char* ARCH)
{
	/* Set desired architecture */
	if(match("knight-native", ARCH))
	{
		if(4 <= DEBUG_LEVEL) fputs("Using knight-native architecture\n", stderr);
		init_macro_env("__knight-native__", "1", "--architecture", 0);
	}
	else if(match("knight-posix", ARCH))
	{
		if(4 <= DEBUG_LEVEL) fputs("Using knight-posix architecture\n", stderr);
		init_macro_env("__knight-posix__", "1", "--architecture", 0);
	}
	else if(match("x86", ARCH))
	{
		if(4 <= DEBUG_LEVEL) fputs("Using x86 architecture\n", stderr);
		init_macro_env("__i386__", "1", "--architecture", 0);
	}
	else if(match("amd64", ARCH))
	{
		if(4 <= DEBUG_LEVEL) fputs("Using amd64 architecture\n", stderr);
		init_macro_env("__x86_64__", "1", "--architecture", 0);
	}
	else if(match("armv7l", ARCH))
	{
		if(4 <= DEBUG_LEVEL) fputs("Using armv7l architecture\n", stderr);
		init_macro_env("__arm__", "1", "--architecture", 0);
	}
	else if(match("aarch64", ARCH))
	{
		if(4 <= DEBUG_LEVEL) fputs("Using aarch64 architecture\n", stderr);
		init_macro_env("__aarch64__", "1", "--architecture", 0);
	}
	else if(match("riscv32", ARCH))
	{
		if(4 <= DEBUG_LEVEL) fputs("Using riscv32 architecture\n", stderr);
		init_macro_env("__riscv", "1", "--architecture", 0);
		init_macro_env("__riscv_xlen", "32", "--architecture", 1);
	}
	else if(match("riscv64", ARCH))
	{
		if(4 <= DEBUG_LEVEL) fputs("Using riscv64 architecture\n", stderr);
		init_macro_env("__riscv", "1", "--architecture", 0);
		init_macro_env("__riscv_xlen", "64", "--architecture", 1);
	}
	else
	{
		fputs("Unknown architecture: ", stderr);
		fputs(ARCH, stderr);
		fputs(" know values are: knight-native, knight-posix, x86, amd64, armv7l, aarch64, riscv32 and riscv64\n", stderr);
		exit(EXIT_FAILURE);
	}
}

void prechecks(int argc, char** argv)
{
	int env=1;
	char* hold;
	int i = 1;
	while(i <= argc)


@@ 68,6 126,7 @@ void prechecks(int argc, char** argv)
				fputs(hold, stderr);
				fputc('\n', stderr);
			}
			setup_architecture(Architecture);
			i += 2;
		}
		else if(match(argv[i], "--max-string"))


@@ 105,6 164,28 @@ void prechecks(int argc, char** argv)
			M2LIBC_PATH = hold;
			i += 2;
		}
		else if(match(argv[i], "-D") || match(argv[i], "--define-macro"))
		{
			hold = argv[i+1];
			if(NULL == hold)
			{
				fputs("-D requires an argument", stderr);
				exit(EXIT_FAILURE);
			}
			while(0 != hold[0])
			{
				if('=' == hold[0])
				{
					hold[0] = 0;
					hold = hold + 1;
					break;
				}
				hold = hold + 1;
			}
			init_macro_env(argv[i+1], hold, "__ARGV__", env);
			env = env + 1;
			i += 2;
		}
		else if(match(argv[i], "-o") || match(argv[i], "--output"))
		{
			DESTINATION_NAME = argv[i + 1];


@@ 226,6 307,11 @@ int main(int argc, char** argv)
			/* Handled by precheck */
			i += 2;
		}
		else if(match(argv[i], "-D") || match(argv[i], "--define-macro"))
		{
			/* Handled by precheck */
			i += 2;
		}
		else if(match(argv[i], "-h") || match(argv[i], "--help"))
		{
			fputs(


@@ 272,7 358,13 @@ int main(int argc, char** argv)
		}
	}

	if(C_PREPROCESS) stage1_preprocess();
	if(C_PREPROCESS) global_token = stage1_preprocess(global_token, 0);

	if(6 == DUMP_STAGE) dump_stage_raw(global_token);
	if(7 == DUMP_STAGE) dump_stage_straight(global_token);

	preprocess();

	dump_stage_straight();
	if(6 == DUMP_STAGE) dump_stage_raw(global_token);
	dump_stage_straight(global_token);
}

A dump.c => dump.c +77 -0
@@ 0,0 1,77 @@
/* Copyright (C) 2016, 2021 Jeremiah Orians
 * Copyright (C) 2020 deesix <deesix@tuta.io>
 * Copyright (C) 2020 Gabriel Wicki
 * This file is part of M2-Planet.
 *
 * M2-Planet is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * M2-Planet is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with M2-Planet.  If not, see <http://www.gnu.org/licenses/>.
 */

#include "cc.h"
#include "cc_globals.h"

void dump_stage_raw(struct token_list* token)
{
	struct token_list* i = token;
	char* hold;
	fputs("{token,\n file name,\n line number,\n}\n", DESTINATION_FILE);
	while(NULL != i)
	{
		fputs("{ \"", DESTINATION_FILE);
		if(NULL == i) fputs("NULL", DESTINATION_FILE);
		else if(NULL == i->s) fputs(":NULL:", DESTINATION_FILE);
		else if(match("\n", i->s)) fputs("\\n", DESTINATION_FILE);
		else fputs(i->s, DESTINATION_FILE);

		fputs("\",\n ", DESTINATION_FILE);
		if(NULL == i) fputs("::NULL::", DESTINATION_FILE);
		else if(NULL == i->filename) fputs(":::NULL:::", DESTINATION_FILE);
		else fputs(i->filename, DESTINATION_FILE);

		fputs(",\n ", DESTINATION_FILE);
		hold = int2str(i->linenumber, 10, FALSE);
		if(NULL == hold) fputs("::::NULL::::", DESTINATION_FILE);
		else fputs(hold, DESTINATION_FILE);

		fputs("\n }\n", DESTINATION_FILE);
		i = i->next;
	}
	exit(EXIT_SUCCESS);
}


void dump_stage_straight(struct token_list* token)
{
	struct token_list* i = token;
	while(NULL != i)
	{
		fputs(i->s, DESTINATION_FILE);
		fputc(' ', DESTINATION_FILE);
		i = i->next;
	}
	exit(EXIT_SUCCESS);
}

void line_error_token(struct token_list *token)
{
	if(NULL == token)
	{
		fputs("EOF reached inside of line_error\n", stderr);
		fputs("problem at end of file\n", stderr);
		return;
	}
	fputs(token->filename, stderr);
	fputs(":", stderr);
	fputs(int2str(token->linenumber, 10, TRUE), stderr);
	fputs(":", stderr);
}

A include_expansion.c => include_expansion.c +99 -0
@@ 0,0 1,99 @@
/* Copyright (C) 2016, 2021 Jeremiah Orians
 * Copyright (C) 2020 deesix <deesix@tuta.io>
 * Copyright (C) 2020 Gabriel Wicki
 * This file is part of M2-Planet.
 *
 * M2-Planet is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * M2-Planet is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with M2-Planet.  If not, see <http://www.gnu.org/licenses/>.
 */

#include "cc.h"
#include "cc_globals.h"

struct token_list* read_all_tokens(FILE* a, struct token_list* current, char* filename);
struct token_list* stage1_preprocess(struct token_list* token, int outer_stage);
struct token_list* make_file_header(char* name, int line);
struct token_list* new_token(char* s, int size, char* filename, int linenum, struct token_list* prev, struct token_list* next);

void expand_includes(struct token_list* token, int depth)
{
	struct token_list* i = token;
	struct token_list* hold;
	struct token_list* reset;
	FILE* library;
	while(!match(i->s, "#include"))
	{
		i = i->next;
		require(NULL != i, "expand_includes not passed a #include\n");
	}
	i = i->next;
	while(match(" ", i->s) || match("\t", i->s))
	{
		i = i->next;
		require(NULL != i, "You put a #include without anything to include\n");
	}
	require(NULL != i, "You put a #include without anything to include\n");
	require(NULL != M2LIBC_PATH, "You need to set a library path using --include-library-directory\n");
	if('<' == i->s[0])
	{
		reset_hold_string();
		strcat(hold_string, M2LIBC_PATH);
		strcat(hold_string, "/");
		strcat(hold_string, i->s + 1);
		hold_string[strlen(hold_string)-1] = 0;
		library = fopen(hold_string, "r");
		if(NULL == library)
		{
			fputs("The file: ", stderr);
			fputs(hold_string, stderr);
			fputs(" does not exist\n", stderr);
			exit(EXIT_FAILURE);
		}
		hold = read_all_tokens(library, NULL, i->s);
	}
	else if('"' == i->s[0])
	{
		reset_hold_string();
		strcat(hold_string, i->s + 1);
		hold_string[strlen(hold_string)-1] = 0;
		library = fopen(hold_string, "r");
		if(NULL == library)
		{
			fputs("The file: ", stderr);
			fputs(hold_string, stderr);
			fputs(" does not exist\n", stderr);
			exit(EXIT_FAILURE);
		}
		hold = read_all_tokens(library, NULL, i->s);
	}
	else
	{
		line_error_token(i);
		fputs("not a valid name for a #include statement", stderr);
		exit(EXIT_FAILURE);
	}
	reset = make_file_header(i->filename, i->linenumber);
	reset->next = hold;
	reset->prev = hold;
	hold = reset;
	hold = new_token("\n", 3, i->filename, i->linenumber, hold, hold);
	hold = stage1_preprocess(hold, depth);

	/* Now wire that shit up */
	reset = i->next;
	i->next = hold;
	hold->prev = i->next;
	while(NULL != hold->next) hold = hold->next;
	hold->next = reset;
	reset->prev = hold;
}

A makefile => makefile +111 -0
@@ 0,0 1,111 @@
## Copyright (C) 2017 Jeremiah Orians
## Copyright (C) 2020-2021 deesix <deesix@tuta.io>
## This file is part of M2-Planet.
##
## M2-Planet is free software: you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation, either version 3 of the License, or
## (at your option) any later version.
##
## M2-Planet is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with M2-Planet.  If not, see <http://www.gnu.org/licenses/>.

# Prevent rebuilding
VPATH = bin:test:test/results
PACKAGE = m2-planet

# C compiler settings
CC?=gcc
CFLAGS:=$(CFLAGS) -D_GNU_SOURCE -D VERSION=\"Release_0.0.0-0-000000\" -O0 -std=c99 -ggdb

all: M3-Preprocess
.NOTPARALLEL:
M3-Preprocess: bin cpp.c cc_reader.c cc_globals.c dump.c c_stage1.c include_expansion.c cc_macro.c M2libc/bootstrappable.c | bin
	$(CC) $(CFLAGS) \
	M2libc/bootstrappable.c \
	cpp.c \
	cc_reader.c \
	include_expansion.c \
	cc_macro.c \
	cc_globals.c \
	dump.c \
	c_stage1.c \
	-o bin/M3-Preprocess

# Clean up after ourselves
.PHONY: clean
clean:
	rm -rf bin/ test/results/
	./test/cleanup_test.sh 0000
	./test/cleanup_test.sh 0001
	./test/cleanup_test.sh 0002
	./test/cleanup_test.sh 0003
	./test/cleanup_test.sh 0004
	./test/cleanup_test.sh 0005
	./test/cleanup_test.sh 0006
	./test/cleanup_test.sh 0007
	./test/cleanup_test.sh 0008
	./test/cleanup_test.sh 0009
	./test/cleanup_test.sh 0010
	./test/cleanup_test.sh 0011
	./test/cleanup_test.sh 0012
	./test/cleanup_test.sh 0013
	./test/cleanup_test.sh 0014
	./test/cleanup_test.sh 0015
	./test/cleanup_test.sh 0016
	./test/cleanup_test.sh 0017
	./test/cleanup_test.sh 0018
	./test/cleanup_test.sh 0019
	./test/cleanup_test.sh 0020
	./test/cleanup_test.sh 0021
	./test/cleanup_test.sh 0022
	./test/cleanup_test.sh 0023
	./test/cleanup_test.sh 0024
	./test/cleanup_test.sh 0025
	./test/cleanup_test.sh 0100
	./test/cleanup_test.sh 0101
	./test/cleanup_test.sh 0102
	./test/cleanup_test.sh 0103
	./test/cleanup_test.sh 0104
	./test/cleanup_test.sh 0105
	./test/cleanup_test.sh 0106
	./test/cleanup_test.sh 1000

# Directories
bin:
	mkdir -p bin

DESTDIR:=
PREFIX:=/usr/local
bindir:=$(DESTDIR)$(PREFIX)/bin
.PHONY: install
install: M3-Preprocess
	mkdir -p $(bindir)
	cp $^ $(bindir)

### dist
.PHONY: dist

COMMIT=$(shell git describe --dirty)
TARBALL_VERSION=$(COMMIT:Release_%=%)
TARBALL_DIR:=$(PACKAGE)-$(TARBALL_VERSION)
TARBALL=$(TARBALL_DIR).tar.gz
# Be friendly to Debian; avoid using EPOCH
MTIME=$(shell git show HEAD --format=%ct --no-patch)
# Reproducible tarball
TAR_FLAGS=--sort=name --mtime=@$(MTIME) --owner=0 --group=0 --numeric-owner --mode=go=rX,u+rw,a-s

$(TARBALL):
	(git ls-files					\
	    --exclude=$(TARBALL_DIR);			\
	    echo $^ | tr ' ' '\n')			\
	    | tar $(TAR_FLAGS)				\
	    --transform=s,^,$(TARBALL_DIR)/,S -T- -cf-	\
	    | gzip -c --no-name > $@

dist: $(TARBALL)