~sircmpwn/annotatec

d44b6a67db2a46ef5bb808eac8401df415371433 — Drew DeVault 7 months ago d06735f
Complete graph construction
7 files changed, 191 insertions(+), 77 deletions(-)

M Makefile
A graph.c
M graph.h
M lang.l
M lang.y
M main.c
M parse.h
M Makefile => Makefile +2 -1
@@ 7,7 7,8 @@ CFLAGS+=-g -Wall -Wextra -Wpedantic -Werror \
OBJECTS:=\
	lang.yy.o \
	lang.tab.o \
	main.o
	main.o \
	graph.o

lang.yy.c: lang.l lang.tab.h
	lex -t $< > $@

A graph.c => graph.c +120 -0
@@ 0,0 1,120 @@
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "graph.h"

static unsigned int djb2(const char *str) {
	unsigned int hash = 5381;
	char c;
	while ((c = *str++)) {
		hash = ((hash << 5) + hash) + c;
	}
	return hash;
}

static struct symbol_definition *get_or_cache_symbol(
		struct project_graph *graph, struct source_file *file,
		char *name, bool is_static, int lineno, int colno) {
	struct symbol_definition *symbol;
	/* Index symbol */
	unsigned int hash = djb2(name);
	size_t len = sizeof(graph->cache.buckets) / sizeof(graph->cache.buckets[0]);
	size_t index = hash % len;
	struct symtab_entry *previous = NULL, *bucket = graph->cache.buckets[index];
	while (bucket) {
		symbol = bucket->symbol;
		if (bucket->hash == hash && strcmp(symbol->name, name) == 0) {
			if (((symbol->is_static || is_static) && file == symbol->file) ||
					(symbol->file && file)) {
				/* Duplicate symbols */
				fprintf(stderr, "\nWarning: duplicate symbol %s "
						"(in both %s and %s; using former as canonical)", name,
						symbol->file->path, file->path);
				return symbol;
			} else if (!symbol->file && !file) {
				/* Repeated orphan reference */
				return symbol;
			} else if (!symbol->file && file && !is_static) {
				/* Adopt orphaned symbol */
				symbol->file = file;
				symbol->lineno = lineno;
				symbol->colno = colno;
				symbol->is_static = is_static;
				if (symbol->prev) {
					symbol->prev->next = symbol->next;
				}
				++file->nsyms;
				symbol->next = file->symbols;
				file->symbols = symbol;
				return symbol;
			} else if (symbol->file && !file) {
				return symbol;
			}
		}
		previous = bucket;
		bucket = bucket->next;
	}

	symbol = calloc(sizeof(struct symbol_definition), 1);
	symbol->name = name;
	symbol->lineno = lineno;
	symbol->colno = colno;
	symbol->is_static = is_static;
	symbol->file = file;
	if (file) {
		++file->nsyms;
		if (file->symbols) {
			file->symbols->prev = symbol;
		}
		symbol->next = file->symbols;
		file->symbols = symbol;
	} else {
		++graph->norphans;
		if (graph->orphans) {
			graph->orphans->prev = symbol;
		}
		symbol->next = graph->orphans;
		graph->orphans = symbol;
	}

	bucket = calloc(1, sizeof(struct symtab_entry));
	bucket->hash = hash;
	bucket->symbol = symbol;
	if (previous == NULL) {
		graph->cache.buckets[index] = bucket;
	} else {
		previous->next = bucket;
	}
	return symbol;
}

struct symbol_definition *define_symbol(
		struct project_graph *graph, struct source_file *file,
		char *name, bool is_static, int lineno, int colno) {
	return get_or_cache_symbol(graph, file, name, is_static, lineno, colno);
}

struct symbol_reference *reference_symbol(
		struct project_graph *graph, struct source_file *file,
		char *name, int lineno, int colno) {
	assert(file);
	struct symbol_reference *reference =
		calloc(sizeof(struct symbol_reference), 1);
	reference->lineno = lineno;
	reference->colno = colno;

	struct symbol_definition *symbol =
		get_or_cache_symbol(graph, NULL, name, false, 0, 0);

	reference->symbol = symbol;
	reference->snext = symbol->references;
	symbol->references = reference;

	reference->file = file;
	reference->fnext = file->references;
	file->references = reference;

	++file->nrefs;
	return reference;
}

M graph.h => graph.h +19 -2
@@ 18,8 18,12 @@ struct typedef_declaration {
	int lineno, colno;
};

struct symbol_reference;
struct source_file;

struct symbol_definition {
	struct symbol_definition *next;
	struct symbol_definition *next, *prev;
	struct symbol_reference *references;
	struct source_file *file;
	char *name;
	int lineno, colno;


@@ 28,8 32,10 @@ struct symbol_definition {
};

struct symbol_reference {
	struct symbol_reference *next;
	/* Linked via both files and definitions */
	struct symbol_reference *snext, *fnext;
	struct symbol_definition *symbol;
	struct source_file *file;
	int lineno, colno;
};



@@ 46,6 52,17 @@ struct project_graph {
	struct source_file *files;
	struct symbol_definition *orphans;
	struct symbol_cache cache;
	int norphans;
};

struct symbol_definition *define_symbol(
		struct project_graph *graph, struct source_file *file,
		char *name, bool is_static, int lineno, int colno);

struct symbol_reference *reference_symbol(
		struct project_graph *graph, struct source_file *file,
		char *name, int lineno, int colno);

extern struct project_graph graph;

#endif

M lang.l => lang.l +1 -1
@@ 41,7 41,7 @@ struct source_file *lexreset(const char *fn, FILE *input) {
	original_filename = fn;
	filename = strdup(original_filename);
	current_source = calloc(1, sizeof(struct source_file));
	current_source->path = filename;
	current_source->path = strdup(fn);
	yyrestart(input);
	return current_source;
}

M lang.y => lang.y +8 -35
@@ 11,8 11,6 @@ int yylex(void);

extern int lineno, colno;
extern char *filename, *original_filename;

struct parse_state parse_state = { 0 };
%}

%union {


@@ 135,22 133,14 @@ postfix_expression
	| postfix_expression '[' expression ']'
	| postfix_expression '(' ')' {
		if ($1.name && strcmp(filename, original_filename) == 0) {
			struct funccall *call = calloc(1, sizeof(struct funccall));
			call->strloc.name = $1.name;
			call->strloc.lineno = $1.lineno;
			call->strloc.colno = $1.colno;
			call->next = parse_state.funccalls;
			parse_state.funccalls = call;
			reference_symbol(&graph, current_source,
				$1.name, $1.lineno, $1.colno);
		}
	}
	| postfix_expression '(' argument_expression_list ')' {
		if ($$.name && strcmp(filename, original_filename) == 0) {
			struct funccall *call = calloc(1, sizeof(struct funccall));
			call->strloc.name = $1.name;
			call->strloc.lineno = $1.lineno;
			call->strloc.colno = $1.colno;
			call->next = parse_state.funccalls;
			parse_state.funccalls = call;
			reference_symbol(&graph, current_source,
				$1.name, $1.lineno, $1.colno);
		}
	}
	| postfix_expression '.' CZ_IDENTIFIER


@@ 664,31 654,14 @@ external_declaration
function_definition
	: declaration_specifiers declarator declaration_list compound_statement {
		if (!filename || strcmp(filename, original_filename) == 0) {
			/* TODO: Add to symbol_cache for quick lookup */
			struct symbol_definition *symbol =
				calloc(sizeof(struct symbol_definition), 1);
			symbol->file = current_source;
			symbol->name = $2.name;
			symbol->lineno = $2.lineno;
			symbol->colno = $2.colno;
			symbol->is_static = $1 == CZ_STATIC;
			symbol->next = current_source->symbols;
			current_source->symbols = symbol;
			++current_source->nsyms;
			define_symbol(&graph, current_source, $2.name,
				$1 == CZ_STATIC, $2.lineno, $2.colno);
		}
	}
	| declaration_specifiers declarator compound_statement {
		if (!filename || strcmp(filename, original_filename) == 0) {
			struct symbol_definition *symbol =
				calloc(sizeof(struct symbol_definition), 1);
			symbol->file = current_source;
			symbol->name = $2.name;
			symbol->lineno = $2.lineno;
			symbol->colno = $2.colno;
			symbol->is_static = $1 == CZ_STATIC;
			symbol->next = current_source->symbols;
			current_source->symbols = symbol;
			++current_source->nsyms;
			define_symbol(&graph, current_source, $2.name,
				$1 == CZ_STATIC, $2.lineno, $2.colno);
		}
	}
	;

M main.c => main.c +41 -27
@@ 10,39 10,42 @@ struct project_graph graph = { 0 };
static FILE *invoke_cpp(const char *cppcmd, const char *input) {
	char cmd[4096];
	if (snprintf(cmd, sizeof(cmd), "%s %s", cppcmd, input) == sizeof(cmd)) {
		fprintf(stderr, "cpp command larger than internal buffer\n");
		fprintf(stderr, "\nFatal: cpp command larger than internal buffer\n");
		exit(1);
	}
	return popen(cmd, "r");
}

static void postprocess(struct source_file *file) {
	fprintf(stderr, "%3d symbols %3d references\n", file->nsyms, file->nrefs);
	struct symbol_definition *symbol = file->symbols;
static void emit_annotations(struct source_file *file) {
	printf("%s: %3d symbols %3d references %3d orphans\n",
			file->path, file->nsyms, file->nrefs, graph.norphans);

	struct symbol_reference *ref;
	struct symbol_definition *symbol;

	symbol = file->symbols;
	while (symbol) {
		// TODO: Index symbols and adopt orphans
		printf("\tdefines: %s@%d:%d (static: %d)\n",
				symbol->name, symbol->lineno, symbol->colno, symbol->is_static);
		ref = symbol->references;
		while (ref) {
			printf("\t\treference: %s:%d:%d\n",
					ref->file->path, ref->lineno, ref->colno);
			ref = ref->snext;
		}
		symbol = symbol->next;
	}

	struct funccall *call = parse_state.funccalls;
	while (call) {
		/* TODO: stow away */
		struct funccall *next = call->next;
		free(call->strloc.name);
		free(call);
		call = next;
	}

	struct typedef_declaration *td = file->typedefs;
	while (td) {
		/* Note: we don't track the usage of these, who cares tbh */
		struct typedef_declaration *next = td->next;
		free(td->name);
		free(td);
		td = next;
	ref = file->references;
	while (ref) {
		symbol = ref->symbol;
		if (symbol->file) {
			printf("\treferences: %s@%d:%d via %s:%d:%d\n",
					symbol->name, ref->lineno, ref->colno,
					symbol->file->path, symbol->lineno, symbol->colno);
		}
		ref = ref->fnext;
	}

	memset(&parse_state, 0, sizeof(parse_state));
}

extern int yydebug;


@@ 66,19 69,30 @@ int main(int argc, char **argv) {
		}
	}

	int nsyms = 0, nrefs = 0;
	for (int i = optind; i < argc; ++i) {
		const char *path = argv[i];
		// TODO: Put debug prints behind a flag
		fprintf(stderr, "Scanning %d/%-6d %-20s ",
				i - optind + 1, argc - optind, path);
		FILE *cpp = invoke_cpp(cppcmd, path);
		struct source_file *file = lexreset(path, cpp);
		fprintf(stderr, "Scanning %3d/%d %-25s ",
				i - optind + 1, argc - optind, path);
		yyparse();
		pclose(cpp);
		postprocess(file);
		fprintf(stderr, "%3d symbols %5d references\n",
				file->nsyms, file->nrefs);
		nsyms += file->nsyms, nrefs += file->nrefs;
		file->next = graph.files;
		graph.files = file;
	}
	fprintf(stderr, "Scan complete: "
			"%d files, %d symbols, %d references, %d orphan refs\n",
			argc - optind, nsyms, nrefs, graph.norphans);

	struct source_file *file = graph.files;
	while (file) {
		emit_annotations(file);
		file = file->next;
	}

	// TODO: free state


M parse.h => parse.h +0 -11
@@ 17,17 17,6 @@ struct strloc_list {
	struct strloc_list *next;
};

/* TODO: replace with symbol_reference */
struct funccall {
	struct strloc strloc;
	struct funccall *next;
};

struct parse_state {
	struct funccall *funccalls;
};

extern struct parse_state parse_state;
extern struct source_file *current_source;

#endif