~oriansj/M3-Preprocess

e0accc2ab1aa3fca3931b291a95ba715806e09bb — Jeremiah Orians 2 years ago d5be987
Starting to look like a real token stream
6 files changed, 178 insertions(+), 72 deletions(-)

A c_stage1.c
M cc.h
M cc_globals.c
M cc_globals.h
M cc_reader.c
M cpp.c
A c_stage1.c => c_stage1.c +118 -0
@@ 0,0 1,118 @@
/* Copyright (C) 2016, 2021 Jeremiah Orians
 * Copyright (C) 2020 deesix <deesix@tuta.io>
 * Copyright (C) 2020 Gabriel Wicki
 * This file is part of M2-Planet.
 *
 * M2-Planet is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * M2-Planet is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with M2-Planet.  If not, see <http://www.gnu.org/licenses/>.
 */

#include "cc.h"
#include "cc_globals.h"

int strtoint(char *a);
struct token_list* new_token(char* s, int size, char* filename, int linenum, struct token_list* prev, struct token_list* next);
struct token_list* reverse_list(struct token_list* head);
void dump_stage_raw();
void dump_stage_straight();

struct pattern* new_pattern(char* s, struct pattern* next)
{
	struct pattern* i = calloc(1, sizeof(struct pattern));
	i->s = s;
	i->next = next;
	return i;
}

int pattern_compress(struct token_list* i, struct pattern* p)
{
	if(NULL == p) return TRUE;
	if(NULL == i) return FALSE;
	if(!match(p->s, i->s)) return FALSE;
	if(!pattern_compress(i->next, p->next)) return FALSE;
	if(NULL == p->next) return TRUE;
	struct token_list* hold = i->next;
	i->next = i->next->next;
	free(hold);
	return TRUE;
}

void combine_common()
{
	struct token_list* i = global_token;

	struct pattern* decrement = new_pattern("-", new_pattern("-", NULL));
	struct pattern* increment = new_pattern("+", new_pattern("+", NULL));
	struct pattern* addassign = new_pattern("+", new_pattern("=", NULL));
	struct pattern* subassign = new_pattern("-", new_pattern("=", NULL));
	struct pattern* mulassign = new_pattern("*", new_pattern("=", NULL));
	struct pattern* divassign = new_pattern("/", new_pattern("=", NULL));
	struct pattern* modassign = new_pattern("%", new_pattern("=", NULL));
	struct pattern* equality = new_pattern("=", new_pattern("=", NULL));
	struct pattern* inequality = new_pattern("!", new_pattern("=", NULL));
	struct pattern* GTE = new_pattern(">", new_pattern("=", NULL));
	struct pattern* LTE = new_pattern("<", new_pattern("=", NULL));
	struct pattern* logicaland = new_pattern("&", new_pattern("&", NULL));
	struct pattern* logicalor = new_pattern("|", new_pattern("|", NULL));
	struct pattern* shiftleft = new_pattern("<", new_pattern("<", NULL));
	struct pattern* shiftright = new_pattern(">", new_pattern(">", NULL));
	struct pattern* arrow = new_pattern("-", new_pattern(">", NULL));
	struct pattern* andassign = new_pattern("&", new_pattern("=", NULL));
	struct pattern* orassign = new_pattern("|", new_pattern("=", NULL));
	struct pattern* xorassign = new_pattern("^", new_pattern("=", NULL));
	struct pattern* leftassign = new_pattern("<", new_pattern("<", new_pattern("=", NULL)));
	struct pattern* rightassign = new_pattern(">", new_pattern(">", new_pattern("=", NULL)));

	while(NULL != i)
	{
		if(NULL == i->next) break;

		if(pattern_compress(i, leftassign)) i->s = "<<=";
		else if(pattern_compress(i, rightassign)) i->s = ">>=";
		else if(pattern_compress(i, decrement)) i->s = "--";
		else if(pattern_compress(i, increment)) i->s = "++";
		else if(pattern_compress(i, addassign)) i->s = "+=";
		else if(pattern_compress(i, subassign)) i->s = "-=";
		else if(pattern_compress(i, mulassign)) i->s = "*=";
		else if(pattern_compress(i, divassign)) i->s = "/=";
		else if(pattern_compress(i, modassign)) i->s = "%=";
		else if(pattern_compress(i, equality)) i->s = "==";
		else if(pattern_compress(i, inequality)) i->s = "!=";
		else if(pattern_compress(i, GTE)) i->s = ">=";
		else if(pattern_compress(i, LTE)) i->s = "<=";
		else if(pattern_compress(i, andassign)) i->s = "&=";
		else if(pattern_compress(i, orassign)) i->s = "|=";
		else if(pattern_compress(i, xorassign)) i->s = "^=";
		else if(pattern_compress(i, logicaland)) i->s = "&&";
		else if(pattern_compress(i, logicalor)) i->s = "||";
		else if(pattern_compress(i, shiftleft)) i->s = "<<";
		else if(pattern_compress(i, shiftright)) i->s = ">>";
		else if(pattern_compress(i, arrow)) i->s = "->";
		else i = i->next;
	}

}

void stage1_preprocess()
{
	if(1 == DUMP_STAGE) dump_stage_raw();

	global_token = reverse_list(global_token);

	if(2 == DUMP_STAGE) dump_stage_raw();
	if(3 == DUMP_STAGE) dump_stage_straight();
	combine_common();
	if(4 == DUMP_STAGE) dump_stage_raw();
	if(5 == DUMP_STAGE) dump_stage_straight();

}

M cc.h => cc.h +9 -0
@@ 20,6 20,8 @@
#include <stdio.h>
#include <string.h>

#ifndef _CC_H
#define _CC_H

#define FALSE 0
#define TRUE 1


@@ 67,4 69,11 @@ struct token_list
	};
};

struct pattern
{
	char* s;
	struct pattern* next;
};

#include "cc_globals.h"
#endif

M cc_globals.c => cc_globals.c +8 -3
@@ 16,6 16,8 @@
 * along with M2-Planet.  If not, see <http://www.gnu.org/licenses/>.
 */

#include <stdio.h>

/* What types we have */
struct type* global_types;
struct type* prim_types;


@@ 30,9 32,9 @@ struct token_list* globals_list;

/* Make our string collection more efficient */
char* hold_string;
int string_index;
size_t string_index;

int MAX_STRING;
size_t MAX_STRING;

/* enable preprocessor-only mode */
int PREPROCESSOR_MODE;


@@ 44,7 46,10 @@ int WORDSIZE;
int ENDIAN;
char* BASEADDRESS;
int STDIO_USED;
char* TEMPDIR;

/* How we deal with output */
char* DESTINATION_NAME;
FILE* DESTINATION_FILE;

/* So we don't shoot ourself in the face */
int FUZZING;

M cc_globals.h => cc_globals.h +12 -3
@@ 16,6 16,10 @@
 * along with M2-Planet.  If not, see <http://www.gnu.org/licenses/>.
 */

#include <stdio.h>

#ifndef _CC_GLOBALS_H
#define _CC_GLOBALS_H
/* What types we have */
extern struct type* global_types;
extern struct type* prim_types;


@@ 30,10 34,10 @@ extern struct token_list* globals_list;

/* Make our string collection more efficient */
extern char* hold_string;
extern int string_index;
extern size_t string_index;

/* Allow us to have a single settable max string */
extern long MAX_STRING;
extern size_t MAX_STRING;

/* enable preprocessor-only mode */
extern int PREPROCESSOR_MODE;


@@ 45,10 49,15 @@ extern int WORDSIZE;
extern int ENDIAN;
extern char* BASEADDRESS;
extern int STDIO_USED;
extern char* TEMPDIR;

/* How we deal with output */
extern char* DESTINATION_NAME;
extern FILE* DESTINATION_FILE;

/* So we don't shoot ourself in the face */
extern int FUZZING;
extern int DIRTY_MODE;
extern int DUMP_STAGE;
extern int DEBUG_LEVEL;

#endif

M cc_reader.c => cc_reader.c +13 -23
@@ 85,7 85,7 @@ int preserve_keyword(int c, char* S)

void clear_string(char* s)
{
	int i = 0;
	size_t i = 0;
	while(0 != s[i])
	{
		s[i] = 0;


@@ 118,8 118,7 @@ struct token_list* eat_token(struct token_list* token)
	return token->next;
}


void new_token(char* s, int size)
struct token_list* new_token(char* s, int size, char* filename, int linenum, struct token_list* prev, struct token_list* next)
{
	struct token_list* current = calloc(1, sizeof(struct token_list));
	require(NULL != current, "Exhausted memory while getting token\n");


@@ 129,11 128,11 @@ void new_token(char* s, int size)
	require(NULL != current->s, "Exhausted memory while trying to copy a token\n");
	copy_string(current->s, s, MAX_STRING);

	current->prev = token;
	current->next = token;
	current->linenumber = line;
	current->filename = file;
	token = current;
	current->prev = prev;
	current->next = next;
	current->linenumber = linenum;
	current->filename = filename;
	return current;
}

int get_token(int c)


@@ 176,7 175,7 @@ int get_token(int c)
			c = consume_byte(c);
		}
	}
	else if(in_set(c, " \a\b\t\n\v\f\r<=>|&!^%*+-"))
	else if(in_set(c, " \a\b\t\n\v\f\r<=>|&!^%*+-:;"))
	{
		c = consume_byte(c);
	}


@@ 187,20 186,12 @@ int get_token(int c)
	}
	else if(in_set(c, "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"))
	{
		c = preserve_keyword(c, "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_:");
		c = preserve_keyword(c, "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_");
	}
	else if(in_set(c, "'\""))
	{
		c = preserve_string(c);
	}
	else if(c == '*')
	{
		c = consume_byte(c);
		if(c == '=')
		{
			c = consume_byte(c);
		}
	}
	else
	{
		c = consume_byte(c);


@@ 231,23 222,22 @@ void insert_file_header(char* name, int line)
	strcat(hold_string, name);
	strcat(hold_string, " ");
	strcat(hold_string, hold_line);
	new_token(hold_string, strlen(hold_string)+2);
	new_token("\n", 3);
	token = new_token(hold_string, strlen(hold_string)+2, name, line, token, token);
	token = new_token("\n", 3, name, line, token, token);
}

struct token_list* read_all_tokens(FILE* a, struct token_list* current, char* filename)
{
	token = current;
	input  = a;
	line = 0;
	line = 1;
	file = filename;
	insert_file_header(filename, 1);
	line = 1;
	int ch = grab_byte();
	while(EOF != ch)
	{
		ch = get_token(ch);
		new_token(hold_string, string_index + 2);
		token = new_token(hold_string, string_index + 2, file, line, token, token);
	}

	return token;

M cpp.c => cpp.c +18 -43
@@ 17,15 17,13 @@
 * along with M2-Planet.  If not, see <http://www.gnu.org/licenses/>.
 */

#include"cc.h"
#include <unistd.h>
#include "cc.h"
#include "cc_globals.h"

int strtoint(char *a);
struct token_list* read_all_tokens(FILE* a, struct token_list* current, char* filename);

char* destination_name;
FILE* destination_file;

void dump_stage_straight();
void stage1_preprocess();

void prechecks(int argc, char** argv)
{


@@ 109,10 107,10 @@ void prechecks(int argc, char** argv)
		}
		else if(match(argv[i], "-o") || match(argv[i], "--output"))
		{
			destination_name = argv[i + 1];
			require(NULL != destination_name, "--output option requires a filename to follow\n");
			destination_file = fopen(destination_name, "w");
			if(NULL == destination_file)
			DESTINATION_NAME = argv[i + 1];
			require(NULL != DESTINATION_NAME, "--output option requires a filename to follow\n");
			DESTINATION_FILE = fopen(DESTINATION_NAME, "w");
			if(NULL == DESTINATION_FILE)
			{
				fputs("Unable to open for writing file: ", stderr);
				fputs(argv[i + 1], stderr);


@@ 142,8 140,8 @@ int main(int argc, char** argv)
	STDIO_USED = FALSE;
	DIRTY_MODE = FALSE;
	Architecture = NULL;
	destination_name = "/dev/stdout";
	destination_file = stdout;
	DESTINATION_NAME = "/dev/stdout";
	DESTINATION_FILE = stdout;
	global_token = NULL;

	/* Our fun locals */


@@ 157,12 155,12 @@ int main(int argc, char** argv)

	if(0 == DUMP_STAGE)
	{
		fputs("{VARIABLE, PURPOSE, VALUE}\n", destination_file);
		fputs("{DESTINATION_FILE, The target location of where to put the output, ", destination_file);
		fputs(destination_name, destination_file);
		fputs("}\nARCHITECTURE, The architectures for which to expand in #IFDEF blobcks, ", destination_file);
		fputs(Architecture, destination_file);
		fputs("}\n", destination_file);
		fputs("{VARIABLE, PURPOSE, VALUE}\n", DESTINATION_FILE);
		fputs("{DESTINATION_FILE, The target location of where to put the output, ", DESTINATION_FILE);
		fputs(DESTINATION_NAME, DESTINATION_FILE);
		fputs("}\nARCHITECTURE, The architectures for which to expand in #IFDEF blobcks, ", DESTINATION_FILE);
		fputs(Architecture, DESTINATION_FILE);
		fputs("}\n", DESTINATION_FILE);
		exit(EXIT_SUCCESS);
	}



@@ 274,30 272,7 @@ int main(int argc, char** argv)
		}
	}

	if(1 == DUMP_STAGE)
	{
		char* hold;
		fputs("{token,\n file name,\n line number,\n}\n", destination_file);
		while(NULL != global_token)
		{
			fputs("{ \"", destination_file);
			if(NULL == global_token) fputs("NULL", destination_file);
			else if(NULL == global_token->s) fputs(":NULL:", destination_file);
			else if(match("\n", global_token->s)) fputs("\\n", destination_file);
			else fputs(global_token->s, destination_file);

			fputs("\",\n ", destination_file);
			if(NULL == global_token) fputs("::NULL::", destination_file);
			else if(NULL == global_token->filename) fputs(":::NULL:::", destination_file);
			else fputs(global_token->filename, destination_file);

			fputs(",\n ", destination_file);
			hold = int2str(global_token->linenumber, 10, FALSE);
			if(NULL == hold) fputs("::::NULL::::", destination_file);
			else fputs(hold, destination_file);
	if(C_PREPROCESS) stage1_preprocess();

			fputs("\n }\n", destination_file);
			global_token = global_token->next;
		}
	}
	dump_stage_straight();
}