~kdsch/uc

02a87a3e1d9d93777a10d3ff6d1ea8fd25d09bf4 — Karl Schultheisz 2 years ago
First commit
13 files changed, 662 insertions(+), 0 deletions(-)

A Makefile
A README.md
A frame.h
A line.c
A line.h
A node.c
A node.h
A test/args.txt
A test/crasher.c
A trace_iter.h
A tracer.c
A tracer.h
A uc.c
A  => Makefile +13 -0
@@ 1,13 @@
CFLAGS = -Wall -Werror -Wconversion -Wextra -Wno-aggressive-loop-optimizations -std=c11 -pedantic -D_POSIX_C_SOURCE=200809L -g -fmax-errors=3
LDLIBS = -lunwind-ptrace -lunwind-generic -llzma -lunwind
LDFLAGS = -static

check: uc test/crasher
	valgrind -q ./$< <test/args.txt

uc: line.o node.o tracer.o

clean:
	rm -f test/crasher uc *.o

.PHONY: check clean

A  => README.md +28 -0
@@ 1,28 @@
# uc - print unique crashes from fuzz corpus

Fuzzers generate thousands of files, hundreds of which can be
redundant. uc pares them down.

Example usage, in the case of an AFL++ parallel run:

	for f in findings/*/crashes/id*; do
		printf '%s %s\n' "$TARGET" "$f"
	done | ./uc

Output:

```
<location such as "ip=0x435233 offset=0x4 sym=fscanf" or "src/parse.c:2453">
	findings/0/crashes/id...
	findings/2/crashes/id...
	findings/0/crashes/id...
	findings/0/crashes/id...
	findings/3/crashes/id...

<location of another crash>
	findings/0/crashes/id...
	findings/0/crashes/id...
	findings/1/crashes/id...

<...>
```

A  => frame.h +8 -0
@@ 1,8 @@
#ifndef FRAME_H
#define FRAME_H
#include <stdint.h>
struct frame {
	uintptr_t ip, offset;
	char symbol[128];
};
#endif

A  => line.c +22 -0
@@ 1,22 @@
#include "line.h"
#include <string.h>
#include <assert.h>
#include <errno.h>

int
line_next(FILE *f, char **line)
{
	*line = NULL;
	size_t n = 0;
	ssize_t s = getline(line, &n, f);
	char *c = strchr(*line, '\n');

	// TODO: split up *line into space-delimited words

	if (c) {
		*c = '\0';
	}

	return s != -1 && !errno;
}


A  => line.h +5 -0
@@ 1,5 @@
#ifndef LINE_H
#define LINE_H
#include <stdio.h>
int line_next(FILE *f, char **line);
#endif

A  => node.c +176 -0
@@ 1,176 @@
#include "node.h"

#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>

/*

     root
    /    \
   f      g    ← top of stack
   |     / \
   h    i  e
   |    |  |   }- entry points
   e    e  C
   |    |      }- crash data
   A    B

      root: dynamic array of frames
      f, g: frames in which the program crashed (top of stack)
      h, i: frames that were on the stack during a crash
         e: program entry point (main)
   A, B, C: crash data (argv, signal)

*/

enum visitor_msg {
	VISITOR_DONE,
	VISITOR_CONTINUE,
};

struct node_visitor {
	void *data;
	enum visitor_msg (*visit)(void *data, struct node *n);
};

struct node *
node_new_root(void)
{
	struct node *n = malloc(sizeof(struct node));
	memset(n, 0, sizeof(struct node));
	n->type = NODE_ROOT;
	return n;
}

static struct node *
node_new_frame(const struct frame *f)
{
	struct node *n = malloc(sizeof(struct node));
	memset(n, 0, sizeof(struct node));
	n->type = NODE_FRAME;
	n->frame = *f;
	return n;
}

static struct node *
node_new_crash(const struct crash *c)
{
	struct node *n = malloc(sizeof(struct node));
	memset(n, 0, sizeof(struct node));
	n->type = NODE_CRASH;
	n->crash = *c;
	return n;
}

static void
node_array_grow(struct node_array *a)
{
	size_t newcap = a->cap ? 2 * a->cap : 1;
	a->nodes = realloc(a->nodes, newcap * sizeof(struct node *));
	a->cap = newcap;
}

static void
node_array_append(struct node_array *a, struct node *n)
{
	if (a->cap == a->len) {
		node_array_grow(a);
	}

	a->nodes[a->len] = n;
	a->len++;
}

static int
frame_equal(const struct frame *a, const struct frame *b)
{
	return a->ip == b->ip && a->offset == b->offset;
}

static struct node *
node_array_find(const struct node_array *a, const struct frame *f)
{
	for (size_t i = 0; i < a->len; i++) {
		struct node *n = a->nodes[i];

		if (n->type == NODE_FRAME && frame_equal(&n->frame, f)) {
			return n;
		}
	}

	return NULL;
}

void
node_add_trace(struct node *root, const struct trace_iter *t,
	const struct crash *c)
{
	struct node *cur = root;
	assert(cur->type != NODE_CRASH);
	enum trace_msg msg;

	for (struct frame f; (msg = t->next(t->data, &f)) == TRACE_CONTINUE;) {
		assert(cur->type != NODE_CRASH);
		struct node *n = node_array_find(&cur->callers, &f);

		if (!n) {
			n = node_new_frame(&f);
			node_array_append(&cur->callers, n);
		}

		cur = n;
	}

	assert(cur->type != NODE_CRASH);

	if (msg == TRACE_ERR) {
		// If the trace iterator terminates with an error, the stacktrace
		// is incomplete, which is no problem for node_crashes, because it
		// assumes not all leaves are crash nodes. Omitting the crash
		// node prevents node_crashes from gathering incorrect results.
		return;
	}

	node_array_append(&cur->callers, node_new_crash(c));
}

static void
node_visit(struct node *n, struct node_visitor *v)
{
	if (!n || v->visit(v->data, n) == VISITOR_DONE || n->type == NODE_CRASH) {
		return;
	}

	for (size_t i = 0; i < n->callers.len; i++) {
		node_visit(n->callers.nodes[i], v);
	}
}

static enum visitor_msg
append_crashes(void *data, struct node *n)
{
	if (n->type == NODE_CRASH) {
		struct node_array *crashes = data;
		node_array_append(crashes, n);
	}

	return VISITOR_CONTINUE;
}

void
node_crashes(const struct node *root, const struct frame *f,
	struct node_array *crashes)
{
	assert(root->type == NODE_ROOT);
	struct node *n = node_array_find(&root->callers, f);

	struct node_visitor v = {
		.data = crashes,
		.visit = append_crashes,
	};

	node_visit(n, &v);
}


A  => node.h +37 -0
@@ 1,37 @@
#ifndef NODE_H
#define NODE_H
#include <stddef.h>
#include "trace_iter.h"

struct node;

struct node_array {
	size_t cap, len;
	struct node **nodes;
};

struct crash {
	const char **argv;
	int signal;
};

enum node_type {
	NODE_ROOT,
	NODE_FRAME,
	NODE_CRASH,
};

struct node {
	enum node_type type;
	union {
		struct frame frame;
		struct crash crash;
	};
	struct node_array callers;
};

struct node *node_new_root(void);
void node_add_trace(struct node *root, const struct trace_iter *t, const struct crash *c);
void node_crashes(const struct node *root, const struct frame *f, struct node_array *crashes);

#endif

A  => test/args.txt +5 -0
@@ 1,5 @@
segv
fpe
fpe2
ill
abrt

A  => test/crasher.c +96 -0
@@ 1,96 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define len(x) (sizeof(x)/sizeof(x[0]))

static char *s;
static const int zero;

static void
sigill(int n)
{
	if (!n) {
		union {
			int n;
			void (*fn)(int n);
		} u = {.fn = sigill};

		u.n += 2;
		u.fn(0); // achievement unlocked!
	} else {
		sigill(n-1);
	}
}

static void
segfault(int n)
{
	if (!n) {
		printf("%c\n", *s);
	} else {
		segfault(n-1);
	}
}


static void
sigfpe(int n)
{
	if (!n) {
		printf("%d\n", 1/zero);
	} else {
		sigfpe(n-1);
	}
}

static void
sigfpe2(int n)
{
	sigfpe(1);
	(void)n;
}

static void
sigabrt(int n)
{
	if (!n) {
		abort();
	} else {
		sigabrt(n-1);
	}
}

int
main(int argc, const char *argv[])
{
	if (argc < 2) {
		return 1;
	}

	struct {
		const char *type;
		void (*doit)(int n);
	} crash, crashes[] = {
		{"segv", segfault},
		{"fpe", sigfpe},
		{"fpe2", sigfpe2},
		{"ill", sigill},
		{"abrt", sigabrt},
	};

	int n = 0;
	if (argv[2]) {
		n = atoi(argv[2]);
	}

	for (size_t i = 0; i < len(crashes); i++) {
		crash = crashes[i];

		if (strcmp(crash.type, argv[1]) == 0) {
			crash.doit(n);
		}
	}

	return 2;
}

A  => trace_iter.h +14 -0
@@ 1,14 @@
#ifndef TRACE_ITER_H
#define TRACE_ITER_H
#include "frame.h"
enum trace_msg {
	TRACE_DONE,
	TRACE_CONTINUE,
	TRACE_ERR,
};

struct trace_iter {
	void *data;
	enum trace_msg (*next)(void *data, struct frame *f);
};
#endif

A  => tracer.c +164 -0
@@ 1,164 @@
#include "tracer.h"
#include "node.h"

#include <assert.h>
#include <errno.h>
#include <stdio.h>
#include <string.h>
#include <sys/ptrace.h>
#include <sys/wait.h>
#include <unistd.h>

enum wait_event {
	WAIT_CRASH, // the good stuff
	WAIT_BORING, // insignificant event
	WAIT_WTF, // unexpected, unrecoverable event
};

enum wait_event
waitcrash(pid_t child, int *signal)
{
	int wstatus = 0;
	errno = 0;
	waitpid(child, &wstatus, 0);

	if (errno || WIFEXITED(wstatus) || WIFSIGNALED(wstatus)) {
		printf("DEBUG (%s:%d) waitpid %s\n", __FILE__, __LINE__, strerror(errno));
		return WAIT_WTF;
	} else if (WIFSTOPPED(wstatus)) {
		int sig = WSTOPSIG(wstatus);
		switch (sig) {
		case SIGSEGV:
		case SIGFPE:
		case SIGILL:
		case SIGABRT:
			*signal = sig;
			return WAIT_CRASH;
		}
	}

	return WAIT_BORING;
}

_Noreturn void
childhood(const char **argv)
{
	errno = 0;
	ptrace(PTRACE_TRACEME, 0, NULL, NULL);

	if (errno) {
		printf("DEBUG (%s:%d) PTRACE_TRACEME %s\n", __FILE__, __LINE__,
			strerror(errno));
		assert(0);
	}

	execv(argv[0], (char *const *)argv);

	if (errno) {
		printf("DEBUG (%s:%d) execve %s\n", __FILE__, __LINE__, strerror(errno));
	}

	assert(0);
}

enum tracer_status
tracer_init(struct tracer *t, const char **argv)
{
	t->argv = argv;
	t->as = unw_create_addr_space(&_UPT_accessors, 0);

	if (!t->as) {
		return TRACER_ERR;
	}

	t->child = fork();

	if (t->child == -1) {
		printf("DEBUG (%s:%d) fork %s\n", __FILE__, __LINE__, strerror(errno));
		return TRACER_ERR;
	}

	if (!t->child) {
		childhood(argv);
	}

	int wstatus = 0;
	errno = 0;
	waitpid(t->child, &wstatus, 0);

	if (errno || !WIFSTOPPED(wstatus) || WSTOPSIG(wstatus) != SIGTRAP) {
		printf("DEBUG (%s:%d) waitpid %s\n", __FILE__, __LINE__, strerror(errno));
		return TRACER_ERR;
	}

	errno = 0;
	ptrace(PTRACE_CONT, t->child, SIGCONT, NULL);

	if (errno) {
		printf("DEBUG (%s:%d) PTRACE_CONT %s\n", __FILE__, __LINE__, strerror(errno));
		return TRACER_ERR;
	}

wait:
	switch (waitcrash(t->child, &t->signal)) {
	case WAIT_BORING:
		goto wait;
	case WAIT_CRASH:
		break;
	case WAIT_WTF:
		return TRACER_ERR;
	}

	t->ctx = _UPT_create(t->child); // 1

	if (!t->ctx) {
		return TRACER_ERR;
	}

	int err = unw_init_remote(&t->cur, t->as, t->ctx);  // 2

	if (err < 0) {
		printf("DEBUG (%s:%d) unw_init_remote %s\n", __FILE__, __LINE__, unw_strerror(-err));
		return TRACER_ERR;
	}

	return TRACER_OK;
}

enum trace_msg
tracer_step(void *data, struct frame *f)
{
	struct tracer *t = data;
	int err = unw_get_reg(&t->cur, UNW_REG_IP, &f->ip); // 3

	if (err < 0) {
		printf("DEBUG (%s:%d) unw_get_reg %s\n", __FILE__, __LINE__, unw_strerror(-err));
		return TRACE_ERR;
	}

	unw_get_proc_name(&t->cur, f->symbol, sizeof(f->symbol), &f->offset); // 4
	err = unw_step(&t->cur); // 5

	if (err > 0) {
		return TRACE_CONTINUE;
	} else if (err == 0) {
		return TRACE_DONE;
	}

	printf("DEBUG (%s:%d) unw_step %s\n", __FILE__, __LINE__, unw_strerror(-err));
	return TRACE_ERR;
}

void
tracer_finish(struct tracer *t)
{
	_UPT_destroy(t->ctx); // 6
	errno = 0;
	ptrace(PTRACE_DETACH, t->child, 0, 0); // 7
	
	if (errno) {
		printf("DEBUG (%s:%d) PTRACE_DETACH %s\n", __FILE__, __LINE__, strerror(errno));
	}

	unw_destroy_addr_space(t->as);
}

A  => tracer.h +28 -0
@@ 1,28 @@
#ifndef TRACER_H
#define TRACER_H

#include "trace_iter.h"

#include <libunwind.h>
#include <libunwind-ptrace.h>
#include <sys/types.h>

struct tracer {
	const char **argv;
	unw_addr_space_t as;
	pid_t child;
	int signal;
	struct UPT_info *ctx;
	unw_cursor_t cur;
};

enum tracer_status {
	TRACER_OK,
	TRACER_ERR,
};

enum tracer_status tracer_init(struct tracer *t, const char **argv);
enum trace_msg tracer_step(void *data, struct frame *f);
void tracer_finish(struct tracer *t);

#endif

A  => uc.c +66 -0
@@ 1,66 @@
#include <assert.h>
#include <stdlib.h>

#include "line.h"
#include "node.h"
#include "tracer.h"

const char **
argv_new(const char *arg1)
{
	const char **argv = malloc(3 * sizeof(const char *));
	argv[0] = "./test/crasher";
	argv[1] = arg1;
	argv[2] = NULL;
	return argv;
}

int
main(void)
{
	struct node *root = node_new_root();

	for (char *arg; line_next(stdin, &arg);) {
		struct tracer t;
		const char **argv = argv_new(arg);
		enum tracer_status s = tracer_init(&t, argv);

		if (s != TRACER_OK) {
			continue;
		}

		struct trace_iter i = {.data = &t, .next = tracer_step};
		struct crash c = {.argv = argv, .signal = t.signal};
		node_add_trace(root, &i, &c);
		tracer_finish(&t);
	}

	for (size_t i = 0; i < root->callers.len; i++) {
		struct node *n = root->callers.nodes[i];
		assert(n->type == NODE_FRAME);
		struct node_array crashes = {0};
		node_crashes(root, &n->frame, &crashes);

		if (i) {
			printf("\n");
		}

		printf("ip=%lx offset=%lx %s\n", n->frame.ip, n->frame.offset,
			n->frame.symbol);

		for (size_t j = 0; j < crashes.len; j++) {
			struct node *m = crashes.nodes[j];
			printf("\t");
			for (size_t k = 0; m->crash.argv[k]; k++) {
				if (k) {
					printf(" ");
				}

				printf("%s", m->crash.argv[k]);
			}
			printf("\n");
		}
	}

	return 0;
}