~oriansj/M3-Preprocess

d5be987c9f65b5d01aa39754d6d8f6e025e1eea5 — Jeremiah Orians 1 year, 6 months ago
Import logic from M2-Mesoplanet
7 files changed, 738 insertions(+), 0 deletions(-)

A .gitmodules
A M2libc
A cc.h
A cc_globals.c
A cc_globals.h
A cc_reader.c
A cpp.c
A  => .gitmodules +3 -0
@@ 1,3 @@
[submodule "M2libc"]
	path = M2libc
	url = https://git.sr.ht/~oriansj/M2libc

A  => M2libc +1 -0
@@ 1,1 @@
Subproject commit 3f0c58228f4fadf7e2976c5f37da88c9c5cdfe5f

A  => cc.h +70 -0
@@ 1,70 @@
/* Copyright (C) 2016 Jeremiah Orians
 * Copyright (C) 2020 deesix <deesix@tuta.io>
 * This file is part of M2-Planet.
 *
 * M2-Planet is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * M2-Planet is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with M2-Planet.  If not, see <http://www.gnu.org/licenses/>.
 */

#include <stdlib.h>
#include <stdio.h>
#include <string.h>


#define FALSE 0
#define TRUE 1


int in_set(int c, char* s);
int match(char* a, char* b);
void require(int bool, char* error);
char* int2str(int x, int base, int signed_p);
void reset_hold_string();


struct type
{
	struct type* next;
	int size;
	int offset;
	int is_signed;
	struct type* indirect;
	struct type* members;
	struct type* type;
	char* name;
};

struct token_list
{
	struct token_list* next;
	union
	{
		struct token_list* locals;
		struct token_list* prev;
	};
	char* s;
	union
	{
		struct type* type;
		char* filename;
	};
	union
	{
		struct token_list* arguments;
		struct token_list* expansion;
		int depth;
		int linenumber;
	};
};

#include "cc_globals.h"

A  => cc_globals.c +53 -0
@@ 1,53 @@
/* Copyright (C) 2016 Jeremiah Orians
 * Copyright (C) 2020 deesix <deesix@tuta.io>
 * This file is part of M2-Planet.
 *
 * M2-Planet is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * M2-Planet is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with M2-Planet.  If not, see <http://www.gnu.org/licenses/>.
 */

/* What types we have */
struct type* global_types;
struct type* prim_types;

/* What we are currently working on */
struct token_list* global_token;

/* Output reorder collections*/
struct token_list* output_list;
struct token_list* strings_list;
struct token_list* globals_list;

/* Make our string collection more efficient */
char* hold_string;
int string_index;

int MAX_STRING;

/* enable preprocessor-only mode */
int PREPROCESSOR_MODE;

/* enable spawn behavior to be effective */
char* M2LIBC_PATH;
char* Architecture;
int WORDSIZE;
int ENDIAN;
char* BASEADDRESS;
int STDIO_USED;
char* TEMPDIR;

/* So we don't shoot ourself in the face */
int FUZZING;
int DIRTY_MODE;
int DUMP_STAGE;
int DEBUG_LEVEL;

A  => cc_globals.h +54 -0
@@ 1,54 @@
/* Copyright (C) 2016 Jeremiah Orians
 * Copyright (C) 2020 deesix <deesix@tuta.io>
 * This file is part of M2-Planet.
 *
 * M2-Planet is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * M2-Planet is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with M2-Planet.  If not, see <http://www.gnu.org/licenses/>.
 */

/* What types we have */
extern struct type* global_types;
extern struct type* prim_types;

/* What we are currently working on */
extern struct token_list* global_token;

/* Output reorder collections*/
extern struct token_list* output_list;
extern struct token_list* strings_list;
extern struct token_list* globals_list;

/* Make our string collection more efficient */
extern char* hold_string;
extern int string_index;

/* Allow us to have a single settable max string */
extern long MAX_STRING;

/* enable preprocessor-only mode */
extern int PREPROCESSOR_MODE;

/* enable spawn behavior to be effective */
extern char* M2LIBC_PATH;
extern char* Architecture;
extern int WORDSIZE;
extern int ENDIAN;
extern char* BASEADDRESS;
extern int STDIO_USED;
extern char* TEMPDIR;

/* So we don't shoot ourself in the face */
extern int FUZZING;
extern int DIRTY_MODE;
extern int DUMP_STAGE;
extern int DEBUG_LEVEL;

A  => cc_reader.c +254 -0
@@ 1,254 @@
/* Copyright (C) 2016 Jeremiah Orians
 * Copyright (C) 2021 Andrius Štikonas <andrius@stikonas.eu>
 * This file is part of M2-Planet.
 *
 * M2-Planet is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * M2-Planet is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with M2-Planet.  If not, see <http://www.gnu.org/licenses/>.
 */

#include "cc.h"
char* env_lookup(char* variable);
char* int2str(int x, int base, int signed_p);

/* Globals */
FILE* input;
struct token_list* token;
int line;
char* file;

int grab_byte()
{
	int c = fgetc(input);
	if(10 == c) line = line + 1;
	return c;
}

void push_byte(int c)
{
	hold_string[string_index] = c;
	string_index = string_index + 1;
	require(MAX_STRING > string_index, "Token exceeded MAX_STRING char limit\nuse --max-string number to increase\n");
}

int consume_byte(int c)
{
	push_byte(c);
	return grab_byte();
}

int preserve_string(int c)
{
	int frequent = c;
	int escape = FALSE;
	do
	{
		if(!escape && '\\' == c ) escape = TRUE;
		else escape = FALSE;
		c = consume_byte(c);
		require(EOF != c, "Unterminated string\n");
	} while(escape || (c != frequent));
	c = consume_byte(frequent);
	return c;
}


void copy_string(char* target, char* source, int max)
{
	int i = 0;
	while(0 != source[i])
	{
		target[i] = source[i];
		i = i + 1;
		if(i == max) break;
	}
}


int preserve_keyword(int c, char* S)
{
	while(in_set(c, S))
	{
		c = consume_byte(c);
	}
	return c;
}

void clear_string(char* s)
{
	int i = 0;
	while(0 != s[i])
	{
		s[i] = 0;
		i = i + 1;
		require(i < MAX_STRING, "string exceeded max string size while clearing string\n");
	}
}

void reset_hold_string()
{
	clear_string(hold_string);
	string_index = 0;
}


/* note if this is the first token in the list, head needs fixing up */
struct token_list* eat_token(struct token_list* token)
{
	if(NULL != token->prev)
	{
		token->prev->next = token->next;
	}

	/* update backlinks */
	if(NULL != token->next)
	{
		token->next->prev = token->prev;
	}

	return token->next;
}


void new_token(char* s, int size)
{
	struct token_list* current = calloc(1, sizeof(struct token_list));
	require(NULL != current, "Exhausted memory while getting token\n");

	/* More efficiently allocate memory for string */
	current->s = calloc(size, sizeof(char));
	require(NULL != current->s, "Exhausted memory while trying to copy a token\n");
	copy_string(current->s, s, MAX_STRING);

	current->prev = token;
	current->next = token;
	current->linenumber = line;
	current->filename = file;
	token = current;
}

int get_token(int c)
{
	reset_hold_string();

	if(c == EOF)
	{
		return c;
	}
	else if(c == '/')
	{
		c = consume_byte(c);
		if(c == '*')
		{
			c = consume_byte(c);
			while(c != '/')
			{
				while(c != '*')
				{
					c = consume_byte(c);
					require(EOF != c, "Hit EOF inside of block comment\n");
				}
				c = consume_byte(c);
				require(EOF != c, "Hit EOF inside of block comment\n");
			}
			c = consume_byte(c);
		}
		else if(c == '/')
		{
			while(c != '\n')
			{
				c = consume_byte(c);
				require(EOF != c, "Hit EOF inside of line comment\n");
			}
			c = consume_byte(c);
		}
		else if(c == '=')
		{
			c = consume_byte(c);
		}
	}
	else if(in_set(c, " \a\b\t\n\v\f\r<=>|&!^%*+-"))
	{
		c = consume_byte(c);
	}
	else if('#' == c)
	{
		c = consume_byte(c);
		c = preserve_keyword(c, "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_");
	}
	else if(in_set(c, "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"))
	{
		c = preserve_keyword(c, "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_:");
	}
	else if(in_set(c, "'\""))
	{
		c = preserve_string(c);
	}
	else if(c == '*')
	{
		c = consume_byte(c);
		if(c == '=')
		{
			c = consume_byte(c);
		}
	}
	else
	{
		c = consume_byte(c);
	}

	return c;
}

struct token_list* reverse_list(struct token_list* head)
{
	struct token_list* root = NULL;
	struct token_list* next;
	while(NULL != head)
	{
		next = head->next;
		head->next = root;
		root = head;
		head = next;
	}
	return root;
}

void insert_file_header(char* name, int line)
{
	char* hold_line = int2str(line, 10, FALSE);
	reset_hold_string();
	strcat(hold_string, "#FILENAME ");
	strcat(hold_string, name);
	strcat(hold_string, " ");
	strcat(hold_string, hold_line);
	new_token(hold_string, strlen(hold_string)+2);
	new_token("\n", 3);
}

struct token_list* read_all_tokens(FILE* a, struct token_list* current, char* filename)
{
	token = current;
	input  = a;
	line = 0;
	file = filename;
	insert_file_header(filename, 1);
	line = 1;
	int ch = grab_byte();
	while(EOF != ch)
	{
		ch = get_token(ch);
		new_token(hold_string, string_index + 2);
	}

	return token;
}

A  => cpp.c +303 -0
@@ 1,303 @@
/* Copyright (C) 2016, 2021 Jeremiah Orians
 * Copyright (C) 2020 deesix <deesix@tuta.io>
 * Copyright (C) 2020 Gabriel Wicki
 * This file is part of M2-Planet.
 *
 * M2-Planet is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * M2-Planet is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with M2-Planet.  If not, see <http://www.gnu.org/licenses/>.
 */

#include"cc.h"
#include <unistd.h>

int strtoint(char *a);
struct token_list* read_all_tokens(FILE* a, struct token_list* current, char* filename);

char* destination_name;
FILE* destination_file;


void prechecks(int argc, char** argv)
{
	char* hold;
	int i = 1;
	while(i <= argc)
	{
		if(NULL == argv[i])
		{
			i += 1;
		}
		else if(match(argv[i], "--debug-mode"))
		{
			hold = argv[i+1];
			DEBUG_LEVEL = strtoint(hold);
			fputs("DEBUG_LEVEL set to: ", stderr);
			fputs(hold, stderr);
			fputc('\n', stderr);
			i+= 2;
		}
		else if(match(argv[i], "--display-token-stage"))
		{
			hold = argv[i+1];
			DUMP_STAGE = strtoint(hold);
			require(0 < DUMP_STAGE, "--display-token-stage did not recieve a valid value\nAbort\nFix your value\n");
			if(1 <= DEBUG_LEVEL)
			{
				fputs("DUMP_STAGE set by --display-token-stage to", stderr);
				fputs(hold, stderr);
				fputc('\n', stderr);
			}
			i+= 2;
		}
		else if(match(argv[i], "-A") || match(argv[i], "--architecture"))
		{
			hold = argv[i+1];
			require(NULL != hold, "--architecture needs to be passed an architecture\n");
			Architecture = hold;
			if(1 <= DEBUG_LEVEL)
			{
				fputs("ARCHITECTURE set by --architecture to", stderr);
				fputs(hold, stderr);
				fputc('\n', stderr);
			}
			i += 2;
		}
		else if(match(argv[i], "--max-string"))
		{
			hold = argv[i+1];
			if(NULL == hold)
			{
				fputs("--max-string requires a numeric argument\n", stderr);
				exit(EXIT_FAILURE);
			}
			MAX_STRING = strtoint(hold);
			require(0 < MAX_STRING, "Not a valid string size\nAbort and fix your --max-string\n");
			if(1 <= DEBUG_LEVEL)
			{
				fputs("MAX_STRING set by --max-string to ", stderr);
				fputs(hold, stderr);
				fputc('\n', stderr);
			}
			i += 2;
		}
		else if(match(argv[i], "-I") || match(argv[i], "--include-library-directory"))
		{
			hold = argv[i+1];
			if(NULL == hold)
			{
				fputs("-I requires a PATH\n", stderr);
				exit(EXIT_FAILURE);
			}
			if(1 <= DEBUG_LEVEL)
			{
				fputs("M2LIBC_PATH set by --include-library-directory to ", stderr);
				fputs(hold, stderr);
				fputc('\n', stderr);
			}
			M2LIBC_PATH = hold;
			i += 2;
		}
		else if(match(argv[i], "-o") || match(argv[i], "--output"))
		{
			destination_name = argv[i + 1];
			require(NULL != destination_name, "--output option requires a filename to follow\n");
			destination_file = fopen(destination_name, "w");
			if(NULL == destination_file)
			{
				fputs("Unable to open for writing file: ", stderr);
				fputs(argv[i + 1], stderr);
				fputs("\n Aborting to avoid problems\n", stderr);
				exit(EXIT_FAILURE);
			}
			i += 2;
		}
		else
		{
			i += 1;
		}
	}
}

int main(int argc, char** argv)
{
	/****************************************************************************
	 * Zero means no debugging messages and larger positive values means more   *
	 * chatty output. Level 15 means EVERYTHING but 7 should cover most magic   *
	 ****************************************************************************/
	DEBUG_LEVEL = 0;
	DUMP_STAGE = -1;
	/* Our fun globals */
	FUZZING = FALSE;
	MAX_STRING = 65536;
	STDIO_USED = FALSE;
	DIRTY_MODE = FALSE;
	Architecture = NULL;
	destination_name = "/dev/stdout";
	destination_file = stdout;
	global_token = NULL;

	/* Our fun locals */
	int C_PREPROCESS = FALSE;
	FILE* in = stdin;
	char* name;
	int follow_includes = TRUE;

	/* Try to get our needed updates */
	prechecks(argc, argv);

	if(0 == DUMP_STAGE)
	{
		fputs("{VARIABLE, PURPOSE, VALUE}\n", destination_file);
		fputs("{DESTINATION_FILE, The target location of where to put the output, ", destination_file);
		fputs(destination_name, destination_file);
		fputs("}\nARCHITECTURE, The architectures for which to expand in #IFDEF blobcks, ", destination_file);
		fputs(Architecture, destination_file);
		fputs("}\n", destination_file);
		exit(EXIT_SUCCESS);
	}

	/* get our max hold string */
	hold_string = calloc(MAX_STRING + 4, sizeof(char));
	require(NULL != hold_string, "Impossible Exhaustion has occured\n");

	int i = 1;
	while(i <= argc)
	{
		if(NULL == argv[i])
		{
			i += 1;
		}
		else if(match(argv[i], "--display-token-stage"))
		{
			/* Handled by prechecks */
			i += 2;
		}
		else if(match(argv[i], "--no-includes"))
		{
			follow_includes = FALSE;
			i+= 1;
		}
		else if(match(argv[i], "-A") || match(argv[i], "--architecture"))
		{
			/* Handled by precheck */
			i += 2;
		}
		else if(match(argv[i], "-f") || match(argv[i], "--file"))
		{
			name = argv[i + 1];
			if(NULL == name)
			{
				fputs("did not receive a file name\n", stderr);
				exit(EXIT_FAILURE);
			}

			in = fopen(name, "r");
			if(NULL == in)
			{
				fputs("Unable to open for reading file: ", stderr);
				fputs(name, stderr);
				fputs("\n Aborting to avoid problems\n", stderr);
				exit(EXIT_FAILURE);
			}
			global_token = read_all_tokens(in, global_token, name);
			fclose(in);
			i += 2;
		}
		else if(match(argv[i], "-o") || match(argv[i], "--output"))
		{
			/* handled by precheck */
			i += 2;
		}
		else if(match(argv[i], "--max-string"))
		{
			/* handled by precheck */
			i += 2;
		}
		else if(match(argv[i], "-I") || match(argv[i], "--include-library-directory"))
		{
			/* Handled by precheck */
			i += 2;
		}
		else if(match(argv[i], "-h") || match(argv[i], "--help"))
		{
			fputs(
			      " flag and option                  Description                                   short  \n"
			      " -------------------------------- --------------------------------------------- -------\n"
			      " --architecture arch              what architecture to expand                   -A arch\n"
			      " --c-preprocess                   Perform C preprocess rules                           \n"
			      " --display-token-stage ###        Stop at designated stage and dump                    \n"
			      " --fuzz                           Turn on fuzzing protections                          \n"
			      " --help                           Display this help message                     -h     \n"
			      " --include-library-directory PATH Set library <file> location to PATH           -I path\n"
			      " --max-string ###                 Set maximum number of bytes in a string              \n"
			      " --no-includes                    Disable the expanding of #includes                   \n"
			      " --version                        Display program version and exit              -V     \n"
			      " --file file                      Contents are concatenated from left to right  -f file\n"
			      " --output file                    Where to put generated output                 -o file\n"
			      , stdout);
			exit(EXIT_SUCCESS);
		}
		else if(match(argv[i], "-V") || match(argv[i], "--version"))
		{
			fputs("M3-Preprocessor: ", stderr);
			/* use -D VERSION=\"$(git describe --dirty)\" to set correctly*/
			fputs(VERSION, stderr);
			fputc('\n', stderr);
			exit(EXIT_SUCCESS);
		}
		else if(match(argv[i], "--fuzz"))
		{
			/* Set fuzzing */
			FUZZING = TRUE;
			i += 1;
		}
		else if(match(argv[i], "--c-preprocess"))
		{
			/* strip things down */
			C_PREPROCESS = TRUE;
			i += 1;
		}
		else
		{
			fputs("UNKNOWN ARGUMENT\n", stdout);
			exit(EXIT_FAILURE);
		}
	}

	if(1 == DUMP_STAGE)
	{
		char* hold;
		fputs("{token,\n file name,\n line number,\n}\n", destination_file);
		while(NULL != global_token)
		{
			fputs("{ \"", destination_file);
			if(NULL == global_token) fputs("NULL", destination_file);
			else if(NULL == global_token->s) fputs(":NULL:", destination_file);
			else if(match("\n", global_token->s)) fputs("\\n", destination_file);
			else fputs(global_token->s, destination_file);

			fputs("\",\n ", destination_file);
			if(NULL == global_token) fputs("::NULL::", destination_file);
			else if(NULL == global_token->filename) fputs(":::NULL:::", destination_file);
			else fputs(global_token->filename, destination_file);

			fputs(",\n ", destination_file);
			hold = int2str(global_token->linenumber, 10, FALSE);
			if(NULL == hold) fputs("::::NULL::::", destination_file);
			else fputs(hold, destination_file);

			fputs("\n }\n", destination_file);
			global_token = global_token->next;
		}
	}
}