~brenns10/sc-regex

0f51e45979952c3071c0ace88129e0f8671fb1aa — Stephen Brennan 6 months ago 343fb27
Run pre-commit on all files
6 files changed, 270 insertions(+), 274 deletions(-)

M src/codegen.c
M src/instr.c
M src/lex.c
M src/parse.c
M src/pike.c
M src/sc-regex-private.h
M src/codegen.c => src/codegen.c +69 -67
@@ 17,10 17,10 @@
 * efficiently to locations in the final array using a table.
 */

#include <stdio.h>
#include <assert.h>
#include <stdint.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>

#include "sc-regex-private.h"


@@ 63,9 63,9 @@ static void join(struct sc_regex_frag *a, struct sc_regex_frag *b)
	 * will be deleted later.
	 */
	if (l->in.code == SC_REGEX_CODE_MATCH) {
		lastid = (struct sc_regex_instr*) l->id;
		lastid = (struct sc_regex_instr *)l->id;
	} else {
		lastid = (struct sc_regex_instr*) -1;
		lastid = (struct sc_regex_instr *)-1;
	}

	while (a->next != NULL) {


@@ 74,20 74,19 @@ static void join(struct sc_regex_frag *a, struct sc_regex_frag *b)
		 */
		if (a->in.code == SC_REGEX_CODE_MATCH) {
			a->in.code = SC_REGEX_CODE_JUMP;
			a->in.x = (struct sc_regex_instr*)b->id;
			a->in.x = (struct sc_regex_instr *)b->id;
		}
		/*
		 * Jumps to the final match instruction should instead be
		 * targeted to the next fragment.
		 */
		if ((a->in.code == SC_REGEX_CODE_JUMP
				|| a->in.code == SC_REGEX_CODE_SPLIT
			) && a->in.x == lastid
		) {
			a->in.x = (struct sc_regex_instr*) b->id;
		if ((a->in.code == SC_REGEX_CODE_JUMP ||
		     a->in.code == SC_REGEX_CODE_SPLIT) &&
		    a->in.x == lastid) {
			a->in.x = (struct sc_regex_instr *)b->id;
		}
		if (a->in.code == SC_REGEX_CODE_SPLIT && a->in.y == lastid) {
			a->in.y = (struct sc_regex_instr*) b->id;
			a->in.y = (struct sc_regex_instr *)b->id;
		}
		prev = a;
		a = a->next;


@@ 110,8 109,8 @@ static size_t fraglen(struct sc_regex_frag *f)
	return len;
}

static struct sc_regex_frag *newfrag(
	enum sc_regex_code code, struct sc_regex_state *s)
static struct sc_regex_frag *newfrag(enum sc_regex_code code,
                                     struct sc_regex_state *s)
{
	struct sc_regex_frag *new = calloc(1, sizeof(struct sc_regex_frag));
	new->in.code = code;


@@ 129,16 128,16 @@ static void freefraglist(struct sc_regex_frag *f)
	}
}

static struct sc_regex_frag *regex(
	struct sc_regex_ptree *t, struct sc_regex_state *s);
static struct sc_regex_frag *term(
	struct sc_regex_ptree *t, struct sc_regex_state *s);
static struct sc_regex_frag *expr(
	struct sc_regex_ptree *t, struct sc_regex_state *s);
static struct sc_regex_frag *class(
	struct sc_regex_ptree *t, struct sc_regex_state *s, bool is_negative);
static struct sc_regex_frag *sub(
	struct sc_regex_ptree *t, struct sc_regex_state *s);
static struct sc_regex_frag *regex(struct sc_regex_ptree *t,
                                   struct sc_regex_state *s);
static struct sc_regex_frag *term(struct sc_regex_ptree *t,
                                  struct sc_regex_state *s);
static struct sc_regex_frag *expr(struct sc_regex_ptree *t,
                                  struct sc_regex_state *s);
static struct sc_regex_frag *class(struct sc_regex_ptree *t,
                                   struct sc_regex_state *s, bool is_negative);
static struct sc_regex_frag *sub(struct sc_regex_ptree *t,
                                 struct sc_regex_state *s);

static struct sc_regex_frag *special(char type, struct sc_regex_state *s)
{


@@ 152,7 151,7 @@ static struct sc_regex_frag *special(char type, struct sc_regex_state *s)
	case 's':
	case 'S':
		f = (type == 's') ? newfrag(SC_REGEX_CODE_RANGE, s)
			: newfrag(SC_REGEX_CODE_RANGE, s);
		                  : newfrag(SC_REGEX_CODE_RANGE, s);
		f->in.s = nelem(whitespace) / 2;
		f->in.x = calloc(nelem(whitespace), sizeof(char));
		memcpy(f->in.x, whitespace, nelem(whitespace));


@@ 160,7 159,7 @@ static struct sc_regex_frag *special(char type, struct sc_regex_state *s)
	case 'w':
	case 'W':
		f = (type == 'w') ? newfrag(SC_REGEX_CODE_RANGE, s)
			: newfrag(SC_REGEX_CODE_RANGE, s);
		                  : newfrag(SC_REGEX_CODE_RANGE, s);
		f->in.s = nelem(word) / 2;
		f->in.x = calloc(nelem(word), sizeof(char));
		memcpy(f->in.x, word, nelem(word));


@@ 168,13 167,15 @@ static struct sc_regex_frag *special(char type, struct sc_regex_state *s)
	case 'd':
	case 'D':
		f = (type == 'd') ? newfrag(SC_REGEX_CODE_RANGE, s)
			: newfrag(SC_REGEX_CODE_RANGE, s);
		                  : newfrag(SC_REGEX_CODE_RANGE, s);
		f->in.s = nelem(number) / 2;
		f->in.x = calloc(nelem(number), sizeof(char));
		memcpy(f->in.x, number, nelem(number));
		break;
	default:
		fprintf(stderr, "not implemented: special character class '%c'\n", type);
		fprintf(stderr,
		        "not implemented: special character class '%c'\n",
		        type);
		exit(EXIT_FAILURE);
		break;
	}


@@ 183,18 184,17 @@ static struct sc_regex_frag *special(char type, struct sc_regex_state *s)
	return f;
}

static struct sc_regex_frag *term(
	struct sc_regex_ptree *t, struct sc_regex_state *s)
static struct sc_regex_frag *term(struct sc_regex_ptree *t,
                                  struct sc_regex_state *s)
{
	struct sc_regex_frag *f = NULL;

	assert(t->nt == SC_REGEX_NTSYM_TERM);

	if (t->production == 1) {
		if (t->children[0]->tok.sym == SC_REGEX_TSYM_CHAR
			|| t->children[0]->tok.sym == SC_REGEX_TSYM_CARET
			|| t->children[0]->tok.sym == SC_REGEX_TSYM_MINUS
		) {
		if (t->children[0]->tok.sym == SC_REGEX_TSYM_CHAR ||
		    t->children[0]->tok.sym == SC_REGEX_TSYM_CARET ||
		    t->children[0]->tok.sym == SC_REGEX_TSYM_MINUS) {
			// Character
			f = newfrag(SC_REGEX_CODE_CHAR, s);
			f->in.c = t->children[0]->tok.c;


@@ 224,7 224,8 @@ static struct sc_regex_frag *term(
	return f;
}

static struct sc_regex_frag *expr(struct sc_regex_ptree *t, struct sc_regex_state *s)
static struct sc_regex_frag *expr(struct sc_regex_ptree *t,
                                  struct sc_regex_state *s)
{
	struct sc_regex_frag *f = NULL, *a = NULL, *b = NULL, *c = NULL;



@@ 250,14 251,14 @@ static struct sc_regex_frag *expr(struct sc_regex_ptree *t, struct sc_regex_stat
			c = newfrag(SC_REGEX_CODE_MATCH, s);
			if (t->nchildren == 3) {
				/* Non-greedy */
				a->in.x = (struct sc_regex_instr*) c->id;
				a->in.y = (struct sc_regex_instr*) f->id;
				a->in.x = (struct sc_regex_instr *)c->id;
				a->in.y = (struct sc_regex_instr *)f->id;
			} else {
				/* Greedy */
				a->in.x = (struct sc_regex_instr*) f->id;
				a->in.y = (struct sc_regex_instr*) c->id;
				a->in.x = (struct sc_regex_instr *)f->id;
				a->in.y = (struct sc_regex_instr *)c->id;
			}
			b->in.x = (struct sc_regex_instr*) a->id;
			b->in.x = (struct sc_regex_instr *)a->id;
			a->next = f;
			b->next = c;
			join(a, b);


@@ 275,12 276,12 @@ static struct sc_regex_frag *expr(struct sc_regex_ptree *t, struct sc_regex_stat
			b = newfrag(SC_REGEX_CODE_MATCH, s);
			if (t->nchildren == 3) {
				/* Non-greedy */
				a->in.x = (struct sc_regex_instr*) b->id;
				a->in.y = (struct sc_regex_instr*) f->id;
				a->in.x = (struct sc_regex_instr *)b->id;
				a->in.y = (struct sc_regex_instr *)f->id;
			} else {
				/* Greedy */
				a->in.x = (struct sc_regex_instr*) f->id;
				a->in.y = (struct sc_regex_instr*) b->id;
				a->in.x = (struct sc_regex_instr *)f->id;
				a->in.y = (struct sc_regex_instr *)b->id;
			}
			join(f, a);
			a->next = b;


@@ 298,12 299,12 @@ static struct sc_regex_frag *expr(struct sc_regex_ptree *t, struct sc_regex_stat
			b = newfrag(SC_REGEX_CODE_MATCH, s);
			if (t->nchildren == 3) {
				/* Non-greedy */
				a->in.x = (struct sc_regex_instr*) b->id;
				a->in.y = (struct sc_regex_instr*) f->id;
				a->in.x = (struct sc_regex_instr *)b->id;
				a->in.y = (struct sc_regex_instr *)f->id;
			} else {
				/* Greedy */
				a->in.x = (struct sc_regex_instr*) f->id;
				a->in.y = (struct sc_regex_instr*) b->id;
				a->in.x = (struct sc_regex_instr *)f->id;
				a->in.y = (struct sc_regex_instr *)b->id;
			}
			a->next = f;
			join(f, b);


@@ 314,7 315,8 @@ static struct sc_regex_frag *expr(struct sc_regex_ptree *t, struct sc_regex_stat
	}
}

static struct sc_regex_frag *sub(struct sc_regex_ptree *tree, struct sc_regex_state *state)
static struct sc_regex_frag *sub(struct sc_regex_ptree *tree,
                                 struct sc_regex_state *state)
{
	assert(tree->nt == SC_REGEX_NTSYM_SUB);
	struct sc_regex_frag *e = expr(tree->children[0], state);


@@ 329,7 331,8 @@ static struct sc_regex_frag *sub(struct sc_regex_ptree *tree, struct sc_regex_st
	return e;
}

static struct sc_regex_frag *regex(struct sc_regex_ptree *tree, struct sc_regex_state *state)
static struct sc_regex_frag *regex(struct sc_regex_ptree *tree,
                                   struct sc_regex_state *state)
{
	assert(tree->nt == SC_REGEX_NTSYM_REGEX);
	struct sc_regex_frag *s = sub(tree->children[0], state);


@@ 348,13 351,13 @@ static struct sc_regex_frag *regex(struct sc_regex_ptree *tree, struct sc_regex_
		struct sc_regex_frag *r = regex(tree->children[2], state);

		struct sc_regex_frag *pre = newfrag(SC_REGEX_CODE_SPLIT, state);
		pre->in.x = (struct sc_regex_instr*) s->id;
		pre->in.y = (struct sc_regex_instr*) r->id;
		pre->in.x = (struct sc_regex_instr *)s->id;
		pre->in.y = (struct sc_regex_instr *)r->id;
		pre->next = s;

		struct sc_regex_frag *m = newfrag(SC_REGEX_CODE_MATCH, state);
		struct sc_regex_frag *j = newfrag(SC_REGEX_CODE_JUMP, state);
		j->in.x = (struct sc_regex_instr*) m->id;
		j->in.x = (struct sc_regex_instr *)m->id;
		j->next = r;
		join(j, m);
		join(pre, j);


@@ 364,16 367,16 @@ static struct sc_regex_frag *regex(struct sc_regex_ptree *tree, struct sc_regex_
	return s;
}

static struct sc_regex_frag *class(
		struct sc_regex_ptree *tree, struct sc_regex_state *state, bool is_negative)
static struct sc_regex_frag *class(struct sc_regex_ptree *tree,
                                   struct sc_regex_state *state,
                                   bool is_negative)
{
	size_t nranges = 0;
	struct sc_regex_ptree *curr;
	struct sc_regex_frag *f;

	for (curr = tree; curr->nt == SC_REGEX_NTSYM_CLASS;
		curr = curr->children[curr->nchildren-1]
	) {
	     curr = curr->children[curr->nchildren - 1]) {
		nranges++;
	}



@@ 384,22 387,22 @@ static struct sc_regex_frag *class(
	}

	f->in.s = nranges;
	f->in.x = calloc(nranges*2, sizeof(char));
	char *block = (char*)f->in.x;
	f->in.x = calloc(nranges * 2, sizeof(char));
	char *block = (char *)f->in.x;

	curr = tree;
	nranges = 0;
	while (curr->nt == SC_REGEX_NTSYM_CLASS) {
		if (curr->production == 1 || curr->production == 2) {
			/* Range */
			block[2*nranges] = curr->children[0]->tok.c;
			block[2*nranges+1] = curr->children[1]->tok.c;
			block[2 * nranges] = curr->children[0]->tok.c;
			block[2 * nranges + 1] = curr->children[1]->tok.c;
		} else {
			/* Single */
			block[2*nranges] = curr->children[0]->tok.c;
			block[2*nranges+1] = curr->children[0]->tok.c;
			block[2 * nranges] = curr->children[0]->tok.c;
			block[2 * nranges + 1] = curr->children[0]->tok.c;
		}
		curr = curr->children[curr->nchildren-1];
		curr = curr->children[curr->nchildren - 1];
		nranges++;
	}



@@ 410,7 413,7 @@ static struct sc_regex_frag *class(
struct sc_regex *sc_regex_codegen(struct sc_regex_ptree *tree)
{
	/* Generate code. */
	struct sc_regex_state s = {0, 0};
	struct sc_regex_state s = { 0, 0 };
	struct sc_regex_frag *f = regex(tree, &s);
	size_t n;



@@ 440,9 443,8 @@ struct sc_regex *sc_regex_codegen(struct sc_regex_ptree *tree)
	 */
	for (curr = f, i = 0; curr; curr = curr->next, i++) {
		code[i] = curr->in;
		if (code[i].code == SC_REGEX_CODE_JUMP
			|| code[i].code == SC_REGEX_CODE_SPLIT
		) {
		if (code[i].code == SC_REGEX_CODE_JUMP ||
		    code[i].code == SC_REGEX_CODE_SPLIT) {
			code[i].x = code + targets[(intptr_t)code[i].x];
		}
		if (code[i].code == SC_REGEX_CODE_SPLIT) {

M src/instr.c => src/instr.c +78 -85
@@ 6,11 6,11 @@
 * take a compiled regex and dump it to the assembly representation.
 */

#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <ctype.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

#include "sc-regex-private.h"


@@ 21,14 21,11 @@
 * Declarations for parsing.
 */

enum linetype {
	Blank, Label, Code
};
enum linetype { Blank, Label, Code };
typedef enum linetype linetype;

static char *sc_regex_opcodes[] = {
	"char", "match", "jump", "split", "save", "any", "range", "nrange"
};
static char *sc_regex_opcodes[] = { "char", "match", "jump",  "split",
	                            "save", "any",   "range", "nrange" };

/*
 * Utilities for input/output


@@ 36,7 33,7 @@ static char *sc_regex_opcodes[] = {

char *sc_regex_char_to_string(char c)
{
	#define CTS_BUFSIZE 5
#define CTS_BUFSIZE 5
	static char buffer[CTS_BUFSIZE];
	if (c == ' ') {
		buffer[0] = '\\';


@@ 53,7 50,7 @@ char *sc_regex_char_to_string(char c)
			buffer[1] = 'n';
			break;
		case '\r':
			buffer[1]= 'r';
			buffer[1] = 'r';
			break;
		case '\t':
			buffer[1] = 't';


@@ 73,7 70,8 @@ char *sc_regex_char_to_string(char c)
	return buffer;
}

static char string_to_char(char *s) {
static char string_to_char(char *s)
{
	unsigned int c;
	if (s[0] == '\\') {
		switch (s[1]) {


@@ 98,7 96,6 @@ static char string_to_char(char *s) {
	}
}


/**
 * @brief Remove leading and trailing whitespace and comments from a line.
 */


@@ 136,9 133,9 @@ static char *trim(char *line, char *lastchar_out)

static char **tokenize(char *line, size_t *ntok)
{
	#define SEP " \n\t\v\f"
#define SEP " \n\t\v\f"
	size_t alloc = 16;
	char **buf = calloc(alloc, sizeof(char*));
	char **buf = calloc(alloc, sizeof(char *));

	buf[0] = strtok(line, SEP);
	*ntok = 0;


@@ 147,7 144,7 @@ static char **tokenize(char *line, size_t *ntok)

		if (*ntok >= alloc) {
			alloc *= 2;
			buf = realloc(buf, alloc * sizeof(char*));
			buf = realloc(buf, alloc * sizeof(char *));
		}

		buf[*ntok] = strtok(NULL, SEP);


@@ 173,86 170,85 @@ static struct sc_regex_instr read_instr(char *line, int lineno)
	size_t ntok;
	char **tokens = tokenize(line, &ntok);
	struct sc_regex_instr inst = {
		.code=0,
		.c=0,
		.s=0,
		.x=NULL,
		.y=NULL,
		.lastidx=0
		.code = 0, .c = 0, .s = 0, .x = NULL, .y = NULL, .lastidx = 0
	};

	if (strcmp(tokens[0], sc_regex_opcodes[SC_REGEX_CODE_CHAR]) == 0) {
		if (ntok != 2) {
			fprintf(stderr, "line %d: require 2 tokens for char\n",
				lineno);
			        lineno);
			exit(1);
		}
		inst.code = SC_REGEX_CODE_CHAR;
		inst.c = tokens[1][0];
	} else if (strcmp(tokens[0], sc_regex_opcodes[SC_REGEX_CODE_MATCH]) == 0) {
	} else if (strcmp(tokens[0], sc_regex_opcodes[SC_REGEX_CODE_MATCH]) ==
	           0) {
		if (ntok != 1) {
			fprintf(stderr, "line %d: require 1 token for match\n",
				lineno);
			        lineno);
			exit(1);
		}
		inst.code = SC_REGEX_CODE_MATCH;
	} else if (strcmp(tokens[0], sc_regex_opcodes[SC_REGEX_CODE_JUMP]) == 0) {
	} else if (strcmp(tokens[0], sc_regex_opcodes[SC_REGEX_CODE_JUMP]) ==
	           0) {
		if (ntok != 2) {
			fprintf(stderr, "line %d: require 2 tokens for jump\n",
				lineno);
			        lineno);
			exit(1);
		}
		inst.code = SC_REGEX_CODE_JUMP;
		inst.x = (struct sc_regex_instr*)tokens[1];
	} else if (strcmp(tokens[0], sc_regex_opcodes[SC_REGEX_CODE_SPLIT]) == 0) {
		inst.x = (struct sc_regex_instr *)tokens[1];
	} else if (strcmp(tokens[0], sc_regex_opcodes[SC_REGEX_CODE_SPLIT]) ==
	           0) {
		if (ntok != 3) {
			fprintf(stderr, "line %d: require 3 tokens for split\n",
				lineno);
			        lineno);
			exit(1);
		}
		inst.code = SC_REGEX_CODE_SPLIT;
		inst.x = (struct sc_regex_instr*)tokens[1];
		inst.y = (struct sc_regex_instr*)tokens[2];
	} else if (strcmp(
			tokens[0], sc_regex_opcodes[SC_REGEX_CODE_SAVE]) == 0
	) {
		inst.x = (struct sc_regex_instr *)tokens[1];
		inst.y = (struct sc_regex_instr *)tokens[2];
	} else if (strcmp(tokens[0], sc_regex_opcodes[SC_REGEX_CODE_SAVE]) ==
	           0) {
		if (ntok != 2) {
			fprintf(stderr, "line %d: require 2 tokens for save\n",
				lineno);
			        lineno);
			exit(1);
		}
		inst.code = SC_REGEX_CODE_SAVE;
		sscanf(tokens[1], "%zu", &inst.s);
	} else if (strcmp(tokens[0], sc_regex_opcodes[SC_REGEX_CODE_ANY]) == 0
	) {
	} else if (strcmp(tokens[0], sc_regex_opcodes[SC_REGEX_CODE_ANY]) ==
	           0) {
		if (ntok != 1) {
			fprintf(stderr, "line %d: require 1 token for any\n",
				lineno);
			        lineno);
			exit(1);
		}
		inst.code = SC_REGEX_CODE_ANY;
	} else if (strcmp(tokens[0], sc_regex_opcodes[SC_REGEX_CODE_RANGE]) == 0
		|| strcmp(tokens[0], sc_regex_opcodes[SC_REGEX_CODE_NRANGE]) == 0
	) {
	} else if (strcmp(tokens[0], sc_regex_opcodes[SC_REGEX_CODE_RANGE]) ==
	                   0 ||
	           strcmp(tokens[0], sc_regex_opcodes[SC_REGEX_CODE_NRANGE]) ==
	                   0) {
		if (ntok % 2 == 0) {
			fprintf(
				stderr, "line %d, require even number of "
				"character tokens\n", lineno
			);
			fprintf(stderr,
			        "line %d, require even number of "
			        "character tokens\n",
			        lineno);
			exit(1);
		}
		inst.code = (strcmp(tokens[0],
				sc_regex_opcodes[SC_REGEX_CODE_RANGE]) == 0) ?
			SC_REGEX_CODE_RANGE : SC_REGEX_CODE_NRANGE;
		inst.s = (size_t) (ntok - 1) / 2;
		                    sc_regex_opcodes[SC_REGEX_CODE_RANGE]) == 0)
		                    ? SC_REGEX_CODE_RANGE
		                    : SC_REGEX_CODE_NRANGE;
		inst.s = (size_t)(ntok - 1) / 2;
		char *block = calloc(ntok - 1, sizeof(char));
		inst.x = (struct sc_regex_instr*)block;
		inst.x = (struct sc_regex_instr *)block;
		for (size_t i = 0; i < ntok - 1; i++) {
			block[i] = string_to_char(tokens[i+1]);
			block[i] = string_to_char(tokens[i + 1]);
		}
	} else {
		fprintf(stderr, "line %d: unknown opcode \"%s\"\n", lineno,
			tokens[0]);
		        tokens[0]);
	}
	return inst;
}


@@ 265,10 261,9 @@ static struct sc_regex_instr read_instr(char *line, int lineno)
 * @param label Label to search for
 * @param line Line number (for error messages)
 */
static size_t gettarget(
	char **labels, size_t *labelindices, size_t nlabels,
	char *label, size_t line
) {
static size_t gettarget(char **labels, size_t *labelindices, size_t nlabels,
                        char *label, size_t line)
{
	for (size_t i = 0; i < nlabels; i++) {
		if (strcmp(labels[i], label) == 0) {
			return labelindices[i];


@@ 296,7 291,7 @@ struct sc_regex *sc_regex_read(char *str)
	}

	/* Create an array of lines. */
	char **lines = calloc(nlines, sizeof(char*));
	char **lines = calloc(nlines, sizeof(char *));
	size_t j = 0;
	lines[j++] = str;
	for (size_t i = 0; str[i]; i++) {


@@ 333,7 328,7 @@ struct sc_regex *sc_regex_read(char *str)
		if (types[i] == Label) {
			size_t len = strlen(lines[i]);
			/* remove the colon, we don't want it */
			lines[i][len-1] = '\0';
			lines[i][len - 1] = '\0';
			labels[labelidx] = lines[i];
			labelindices[labelidx] = codeidx + 1;
			labelidx++;


@@ 353,20 348,19 @@ struct sc_regex *sc_regex_read(char *str)
			continue;
		}

		rv[codeidx] = read_instr(lines[i], i+1);
		rv[codeidx] = read_instr(lines[i], i + 1);

		/* lookup labels and point them correctly */
		if (rv[codeidx].code == SC_REGEX_CODE_JUMP
			|| rv[codeidx].code == SC_REGEX_CODE_SPLIT
		) {
			rv[codeidx].x = rv + gettarget(
				labels, labelindices, nlabels,
				(char*)rv[codeidx].x, i+1);
		if (rv[codeidx].code == SC_REGEX_CODE_JUMP ||
		    rv[codeidx].code == SC_REGEX_CODE_SPLIT) {
			rv[codeidx].x =
			        rv + gettarget(labels, labelindices, nlabels,
			                       (char *)rv[codeidx].x, i + 1);
		}
		if (rv[codeidx].code == SC_REGEX_CODE_SPLIT) {
			rv[codeidx].y = rv + gettarget(
				labels, labelindices, nlabels,
				(char*)rv[codeidx].y, i+1);
			rv[codeidx].y =
			        rv + gettarget(labels, labelindices, nlabels,
			                       (char *)rv[codeidx].y, i + 1);
		}

		codeidx++;


@@ 382,7 376,8 @@ struct sc_regex *sc_regex_read(char *str)
/**
 * @brief Read a program from a file.
 */
struct sc_regex *sc_regex_fread(FILE *f) {
struct sc_regex *sc_regex_fread(FILE *f)
{
	size_t alloc = 4096;
	char *buf = malloc(alloc);
	size_t start = 0;


@@ 410,9 405,8 @@ void sc_regex_write(struct sc_regex *r, FILE *f)

	/* Find every instruction that needs a label. */
	for (size_t i = 0; i < r->n; i++) {
		if (r->i[i].code == SC_REGEX_CODE_JUMP
			|| r->i[i].code == SC_REGEX_CODE_SPLIT
		) {
		if (r->i[i].code == SC_REGEX_CODE_JUMP ||
		    r->i[i].code == SC_REGEX_CODE_SPLIT) {
			labels[r->i[i].x - r->i] = 1;
		}
		if (r->i[i].code == SC_REGEX_CODE_SPLIT) {


@@ 431,11 425,11 @@ void sc_regex_write(struct sc_regex *r, FILE *f)
		if (labels[i] > 0) {
			fprintf(f, "L%zu:\n", labels[i]);
		}
		char *block = (char*) r->i[i].x;
		char *block = (char *)r->i[i].x;
		switch (r->i[i].code) {
		case SC_REGEX_CODE_CHAR:
			fprintf(f, "    char %s\n",
				sc_regex_char_to_string(r->i[i].c));
			        sc_regex_char_to_string(r->i[i].c));
			break;
		case SC_REGEX_CODE_MATCH:
			fprintf(f, "    match\n");


@@ 445,8 439,8 @@ void sc_regex_write(struct sc_regex *r, FILE *f)
			break;
		case SC_REGEX_CODE_SPLIT:
			fprintf(f, "    split L%zu L%zu\n",
				labels[r->i[i].x - r->i],
				labels[r->i[i].y - r->i]);
			        labels[r->i[i].x - r->i],
			        labels[r->i[i].y - r->i]);
			break;
		case SC_REGEX_CODE_SAVE:
			fprintf(f, "    save %zu\n", r->i[i].s);


@@ 458,10 452,10 @@ void sc_regex_write(struct sc_regex *r, FILE *f)
			fprintf(f, "    nrange");
			for (size_t j = 0; j < r->i[i].s; j++) {
				fprintf(f, " %s",
					sc_regex_char_to_string(block[2*j]));
				        sc_regex_char_to_string(block[2 * j]));
				fprintf(f, " %s",
					sc_regex_char_to_string(
						block[2*j + 1]));
				        sc_regex_char_to_string(
				                block[2 * j + 1]));
			}
			fprintf(f, "\n");
			break;


@@ 469,10 463,10 @@ void sc_regex_write(struct sc_regex *r, FILE *f)
			fprintf(f, "    range");
			for (size_t j = 0; j < r->i[i].s; j++) {
				fprintf(f, " %s",
					sc_regex_char_to_string(block[2*j]));
				        sc_regex_char_to_string(block[2 * j]));
				fprintf(f, " %s",
					sc_regex_char_to_string(
						block[2*j + 1]));
				        sc_regex_char_to_string(
				                block[2 * j + 1]));
			}
			fprintf(f, "\n");
			break;


@@ 485,9 479,8 @@ void sc_regex_write(struct sc_regex *r, FILE *f)
void sc_regex_free(struct sc_regex *r)
{
	for (size_t i = 0; i < r->n; i++) {
		if (r->i[i].code == SC_REGEX_CODE_RANGE
			|| r->i[i].code == SC_REGEX_CODE_NRANGE
		) {
		if (r->i[i].code == SC_REGEX_CODE_RANGE ||
		    r->i[i].code == SC_REGEX_CODE_NRANGE) {
			free(r->i[i].x);
		}
	}

M src/lex.c => src/lex.c +37 -34
@@ 19,45 19,45 @@ void sc_regex_escape(struct sc_regex_lexer *l)
{
	switch (sc_regex_input_idx(l->input, l->index)) {
	case L'(':
		l->tok = (struct sc_regex_token){SC_REGEX_TSYM_CHAR, L'('};
		l->tok = (struct sc_regex_token){ SC_REGEX_TSYM_CHAR, L'(' };
		break;
	case L')':
		l->tok = (struct sc_regex_token){SC_REGEX_TSYM_CHAR, L')'};
		l->tok = (struct sc_regex_token){ SC_REGEX_TSYM_CHAR, L')' };
		break;
	case L'[':
		l->tok = (struct sc_regex_token){SC_REGEX_TSYM_CHAR, L'['};
		l->tok = (struct sc_regex_token){ SC_REGEX_TSYM_CHAR, L'[' };
		break;
	case L']':
		l->tok = (struct sc_regex_token){SC_REGEX_TSYM_CHAR, L']'};
		l->tok = (struct sc_regex_token){ SC_REGEX_TSYM_CHAR, L']' };
		break;
	case L'+':
		l->tok = (struct sc_regex_token){SC_REGEX_TSYM_CHAR, L'+'};
		l->tok = (struct sc_regex_token){ SC_REGEX_TSYM_CHAR, L'+' };
		break;
	case L'-':
		l->tok = (struct sc_regex_token){SC_REGEX_TSYM_CHAR, L'-'};
		l->tok = (struct sc_regex_token){ SC_REGEX_TSYM_CHAR, L'-' };
		break;
	case L'*':
		l->tok = (struct sc_regex_token){SC_REGEX_TSYM_CHAR, L'*'};
		l->tok = (struct sc_regex_token){ SC_REGEX_TSYM_CHAR, L'*' };
		break;
	case L'?':
		l->tok = (struct sc_regex_token){SC_REGEX_TSYM_CHAR, L'?'};
		l->tok = (struct sc_regex_token){ SC_REGEX_TSYM_CHAR, L'?' };
		break;
	case L'^':
		l->tok = (struct sc_regex_token){SC_REGEX_TSYM_CHAR, L'^'};
		l->tok = (struct sc_regex_token){ SC_REGEX_TSYM_CHAR, L'^' };
		break;
	case L'n':
		l->tok = (struct sc_regex_token){SC_REGEX_TSYM_CHAR, L'\n'};
		l->tok = (struct sc_regex_token){ SC_REGEX_TSYM_CHAR, L'\n' };
		break;
	case L'.':
		l->tok = (struct sc_regex_token){SC_REGEX_TSYM_CHAR, L'.'};
		l->tok = (struct sc_regex_token){ SC_REGEX_TSYM_CHAR, L'.' };
		break;
	case L'|':
		l->tok = (struct sc_regex_token){SC_REGEX_TSYM_CHAR, L'|'};
		l->tok = (struct sc_regex_token){ SC_REGEX_TSYM_CHAR, L'|' };
		break;
	default:
		l->tok = (struct sc_regex_token){
			SC_REGEX_TSYM_SPECIAL,
			sc_regex_input_idx(l->input, l->index)};
		l->tok = (struct sc_regex_token){ SC_REGEX_TSYM_SPECIAL,
			                          sc_regex_input_idx(
			                                  l->input, l->index) };
		break;
	}
}


@@ 73,7 73,7 @@ struct sc_regex_token sc_regex_nextsym(struct sc_regex_lexer *l)
	if (l->nbuf > 0) {
		l->tok = l->buf[0];
		for (size_t i = 0; i < l->nbuf - 1; i++) {
			l->buf[i] = l->buf[i+1];
			l->buf[i] = l->buf[i + 1];
		}
		l->nbuf--;
		/* printf(";; nextsym(): unbuffering {%s, '%s'}\n",


@@ 85,49 85,52 @@ struct sc_regex_token sc_regex_nextsym(struct sc_regex_lexer *l)

	switch (sc_regex_input_idx(l->input, l->index)) {
	case L'(':
		l->tok = (struct sc_regex_token){SC_REGEX_TSYM_LPAREN, L'('};
		l->tok = (struct sc_regex_token){ SC_REGEX_TSYM_LPAREN, L'(' };
		break;
	case L')':
		l->tok = (struct sc_regex_token){SC_REGEX_TSYM_RPAREN, L')'};
		l->tok = (struct sc_regex_token){ SC_REGEX_TSYM_RPAREN, L')' };
		break;
	case L'[':
		l->tok = (struct sc_regex_token){SC_REGEX_TSYM_LBRACKET, L'['};
		l->tok =
		        (struct sc_regex_token){ SC_REGEX_TSYM_LBRACKET, L'[' };
		break;
	case L']':
		l->tok = (struct sc_regex_token){SC_REGEX_TSYM_RBRACKET, L']'};
		l->tok =
		        (struct sc_regex_token){ SC_REGEX_TSYM_RBRACKET, L']' };
		break;
	case L'+':
		l->tok = (struct sc_regex_token){SC_REGEX_TSYM_PLUS, L'+'};
		l->tok = (struct sc_regex_token){ SC_REGEX_TSYM_PLUS, L'+' };
		break;
	case L'-':
		l->tok = (struct sc_regex_token){SC_REGEX_TSYM_MINUS, L'-'};
		l->tok = (struct sc_regex_token){ SC_REGEX_TSYM_MINUS, L'-' };
		break;
	case L'*':
		l->tok = (struct sc_regex_token){SC_REGEX_TSYM_STAR, L'*'};
		l->tok = (struct sc_regex_token){ SC_REGEX_TSYM_STAR, L'*' };
		break;
	case L'?':
		l->tok = (struct sc_regex_token){SC_REGEX_TSYM_QUESTION, L'?'};
		l->tok =
		        (struct sc_regex_token){ SC_REGEX_TSYM_QUESTION, L'?' };
		break;
	case L'^':
		l->tok = (struct sc_regex_token){SC_REGEX_TSYM_CARET, L'^'};
		l->tok = (struct sc_regex_token){ SC_REGEX_TSYM_CARET, L'^' };
		break;
	case L'|':
		l->tok = (struct sc_regex_token){SC_REGEX_TSYM_PIPE, L'|'};
		l->tok = (struct sc_regex_token){ SC_REGEX_TSYM_PIPE, L'|' };
		break;
	case L'.':
		l->tok = (struct sc_regex_token){SC_REGEX_TSYM_DOT, L'.'};
		l->tok = (struct sc_regex_token){ SC_REGEX_TSYM_DOT, L'.' };
		break;
	case L'\\':
		l->index++;
		sc_regex_escape(l);
		break;
	case L'\0':
		l->tok = (struct sc_regex_token){SC_REGEX_TSYM_EOF, L'\0'};
		l->tok = (struct sc_regex_token){ SC_REGEX_TSYM_EOF, L'\0' };
		break;
	default:
		l->tok = (struct sc_regex_token){
			SC_REGEX_TSYM_CHAR,
			sc_regex_input_idx(l->input, l->index)};
		l->tok = (struct sc_regex_token){ SC_REGEX_TSYM_CHAR,
			                          sc_regex_input_idx(
			                                  l->input, l->index) };
		break;
	}
	l->index++;


@@ 141,8 144,8 @@ struct sc_regex_token sc_regex_nextsym(struct sc_regex_lexer *l)
void sc_regex_unget(struct sc_regex_token t, struct sc_regex_lexer *l)
{
	if (l->nbuf >= SC_REGEX_LEXER_BUFSIZE) {
		fprintf(stderr,
			"error: maximum lexer buffer size exceeded, dumbass.\n");
		fprintf(stderr, "error: maximum lexer buffer size exceeded, "
		                "dumbass.\n");
		exit(1);
	}



@@ 152,7 155,7 @@ void sc_regex_unget(struct sc_regex_token t, struct sc_regex_lexer *l)
	 */

	for (int i = l->nbuf - 1; i >= 0; i--) {
		l->buf[i+1] = l->buf[i];
		l->buf[i + 1] = l->buf[i];
	}
	l->buf[0] = l->tok;
	l->tok = t;

M src/parse.c => src/parse.c +47 -58
@@ 1,21 1,19 @@
/*
 * parse.c: recursive descent regex parser
 */
#include <stdio.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <wchar.h>

#include "sc-regex-private.h"

char *sc_regex_tsym_names[] = {
	"CharSym", "Special", "Eof", "LParen", "RParen", "LBracket", "RBracket",
	"Plus", "Minus", "Star", "Question", "Caret", "Pipe", "Dot"
};
char *sc_regex_tsym_names[] = { "CharSym", "Special",  "Eof",      "LParen",
	                        "RParen",  "LBracket", "RBracket", "Plus",
	                        "Minus",   "Star",     "Question", "Caret",
	                        "Pipe",    "Dot" };

char *sc_regex_ntsym_names[] = {
	"TERM", "EXPR", "REGEX", "CLASS", "SUB"
};
char *sc_regex_ntsym_names[] = { "TERM", "EXPR", "REGEX", "CLASS", "SUB" };

/*
 * Convenience functions for parse trees.


@@ 30,8 28,8 @@ static struct sc_regex_ptree *terminal_tree(struct sc_regex_token tok)
	return tree;
}

static struct sc_regex_ptree *nonterminal_tree(
		enum sc_regex_ntsym nt, size_t nchildren)
static struct sc_regex_ptree *nonterminal_tree(enum sc_regex_ntsym nt,
                                               size_t nchildren)
{
	struct sc_regex_ptree *tree = calloc(1, sizeof(struct sc_regex_ptree));
	tree->nchildren = nchildren;


@@ 68,7 66,7 @@ static void expect(enum sc_regex_tsym s, struct sc_regex_lexer *l)
		return;
	}
	fprintf(stderr, "error: expected %s, got %s\n", sc_regex_tsym_names[s],
		sc_regex_tsym_names[l->tok.sym]);
	        sc_regex_tsym_names[l->tok.sym]);
	exit(1);
}



@@ 81,20 79,17 @@ static struct sc_regex_ptree *CLASS(struct sc_regex_lexer *l);

static struct sc_regex_ptree *TERM(struct sc_regex_lexer *l)
{
	if (accept(SC_REGEX_TSYM_CHAR, l) 
		|| accept(SC_REGEX_TSYM_DOT, l)
		|| accept(SC_REGEX_TSYM_SPECIAL, l)
		|| accept(SC_REGEX_TSYM_CARET, l)
		|| accept(SC_REGEX_TSYM_MINUS, l)
	) {
		struct sc_regex_ptree *result = nonterminal_tree(
			SC_REGEX_NTSYM_TERM, 1);
	if (accept(SC_REGEX_TSYM_CHAR, l) || accept(SC_REGEX_TSYM_DOT, l) ||
	    accept(SC_REGEX_TSYM_SPECIAL, l) ||
	    accept(SC_REGEX_TSYM_CARET, l) || accept(SC_REGEX_TSYM_MINUS, l)) {
		struct sc_regex_ptree *result =
		        nonterminal_tree(SC_REGEX_NTSYM_TERM, 1);
		result->children[0] = terminal_tree(l->prev);
		result->production = 1;
		return result;
	} else if (accept(SC_REGEX_TSYM_LPAREN, l)) {
		struct sc_regex_ptree *result = nonterminal_tree(
			SC_REGEX_NTSYM_TERM, 3);
		struct sc_regex_ptree *result =
		        nonterminal_tree(SC_REGEX_NTSYM_TERM, 3);
		result->children[0] = terminal_tree(l->prev);
		result->children[1] = REGEX(l);
		expect(SC_REGEX_TSYM_RPAREN, l);


@@ 105,18 100,18 @@ static struct sc_regex_ptree *TERM(struct sc_regex_lexer *l)
		struct sc_regex_ptree *result;
		if (accept(SC_REGEX_TSYM_CARET, l)) {
			result = nonterminal_tree(SC_REGEX_NTSYM_TERM, 3);
			result->children[0] = terminal_tree(
				(struct sc_regex_token)
				{SC_REGEX_TSYM_LBRACKET, '['});
			result->children[0] =
			        terminal_tree((struct sc_regex_token){
			                SC_REGEX_TSYM_LBRACKET, '[' });
			result->children[1] = CLASS(l);
			expect(SC_REGEX_TSYM_RBRACKET, l);
			result->children[2] = terminal_tree(l->prev);
			result->production = 4;
		} else {
			result = nonterminal_tree(SC_REGEX_NTSYM_TERM, 3);
			result->children[0] = terminal_tree(
				(struct sc_regex_token)
				{SC_REGEX_TSYM_LBRACKET, '['});
			result->children[0] =
			        terminal_tree((struct sc_regex_token){
			                SC_REGEX_TSYM_LBRACKET, '[' });
			result->children[1] = CLASS(l);
			expect(SC_REGEX_TSYM_RBRACKET, l);
			result->children[2] = terminal_tree(l->prev);


@@ 131,20 126,18 @@ static struct sc_regex_ptree *TERM(struct sc_regex_lexer *l)

static struct sc_regex_ptree *EXPR(struct sc_regex_lexer *l)
{
	struct sc_regex_ptree *result = nonterminal_tree(
		SC_REGEX_NTSYM_EXPR, 1);
	struct sc_regex_ptree *result =
	        nonterminal_tree(SC_REGEX_NTSYM_EXPR, 1);
	result->children[0] = TERM(l);
	if (accept(SC_REGEX_TSYM_PLUS, l)
		|| accept(SC_REGEX_TSYM_STAR, l)
		|| accept(SC_REGEX_TSYM_QUESTION, l)
	) {
	if (accept(SC_REGEX_TSYM_PLUS, l) || accept(SC_REGEX_TSYM_STAR, l) ||
	    accept(SC_REGEX_TSYM_QUESTION, l)) {
		result->nchildren++;
		result->children[1] = terminal_tree(l->prev);
		if (accept(SC_REGEX_TSYM_QUESTION, l)) {
			result->nchildren++;
			result->children[2] = terminal_tree(
				(struct sc_regex_token)
				{SC_REGEX_TSYM_QUESTION, '?'});
			result->children[2] =
			        terminal_tree((struct sc_regex_token){
			                SC_REGEX_TSYM_QUESTION, '?' });
		}
	}
	return result;


@@ 155,10 148,10 @@ static struct sc_regex_ptree *SUB(struct sc_regex_lexer *l)
	struct sc_regex_ptree *result = nonterminal_tree(SC_REGEX_NTSYM_SUB, 1);
	struct sc_regex_ptree *orig = result, *prev = result;

	while (l->tok.sym != SC_REGEX_TSYM_EOF
		&& l->tok.sym != SC_REGEX_TSYM_RPAREN
		&& l->tok.sym != SC_REGEX_TSYM_PIPE
	) { /* ^ seems like a bit of a hack */
	while (l->tok.sym != SC_REGEX_TSYM_EOF &&
	       l->tok.sym != SC_REGEX_TSYM_RPAREN &&
	       l->tok.sym !=
	               SC_REGEX_TSYM_PIPE) { /* ^ seems like a bit of a hack */
		result->children[0] = EXPR(l);
		result->children[1] = nonterminal_tree(SC_REGEX_NTSYM_SUB, 0);
		result->nchildren = 2;


@@ 177,8 170,8 @@ static struct sc_regex_ptree *SUB(struct sc_regex_lexer *l)

static struct sc_regex_ptree *REGEX(struct sc_regex_lexer *l)
{
	struct sc_regex_ptree *result = nonterminal_tree(
		SC_REGEX_NTSYM_REGEX, 1);
	struct sc_regex_ptree *result =
	        nonterminal_tree(SC_REGEX_NTSYM_REGEX, 1);
	result->children[0] = SUB(l);

	if (accept(SC_REGEX_TSYM_PIPE, l)) {


@@ 192,14 185,10 @@ static struct sc_regex_ptree *REGEX(struct sc_regex_lexer *l)
static bool CCHAR(struct sc_regex_lexer *l)
{
	enum sc_regex_tsym acceptable[] = {
		SC_REGEX_TSYM_CHAR,
		SC_REGEX_TSYM_DOT,
		SC_REGEX_TSYM_LPAREN,
		SC_REGEX_TSYM_RPAREN,
		SC_REGEX_TSYM_PLUS,
		SC_REGEX_TSYM_STAR,
		SC_REGEX_TSYM_QUESTION,
		SC_REGEX_TSYM_PIPE
		SC_REGEX_TSYM_CHAR,     SC_REGEX_TSYM_DOT,
		SC_REGEX_TSYM_LPAREN,   SC_REGEX_TSYM_RPAREN,
		SC_REGEX_TSYM_PLUS,     SC_REGEX_TSYM_STAR,
		SC_REGEX_TSYM_QUESTION, SC_REGEX_TSYM_PIPE
	};
	for (size_t i = 0; i < nelem(acceptable); i++) {
		if (accept(acceptable[i], l)) {


@@ 214,8 203,8 @@ static struct sc_regex_ptree *CLASS(struct sc_regex_lexer *l)
{
	struct sc_regex_ptree *curr, *prev;
	struct sc_regex_token t1, t2, t3;
	struct sc_regex_ptree *result = nonterminal_tree(
		SC_REGEX_NTSYM_CLASS, 0);
	struct sc_regex_ptree *result =
	        nonterminal_tree(SC_REGEX_NTSYM_CLASS, 0);
	curr = result;

	while (true) {


@@ 231,7 220,7 @@ static struct sc_regex_ptree *CLASS(struct sc_regex_lexer *l)
					curr->children[0] = terminal_tree(t1);
					curr->children[1] = terminal_tree(t3);
					curr->children[2] = nonterminal_tree(
						SC_REGEX_NTSYM_CLASS, 0);
					        SC_REGEX_NTSYM_CLASS, 0);
					curr->nchildren = 3;
					curr->production = 1;
					curr = curr->children[2];


@@ 241,7 230,7 @@ static struct sc_regex_ptree *CLASS(struct sc_regex_lexer *l)
					sc_regex_unget(t2, l);
					curr->children[0] = terminal_tree(t1);
					curr->children[1] = nonterminal_tree(
						SC_REGEX_NTSYM_CLASS, 0);
					        SC_REGEX_NTSYM_CLASS, 0);
					curr->nchildren = 2;
					curr->production = 3;
					curr = curr->children[1];


@@ 250,7 239,7 @@ static struct sc_regex_ptree *CLASS(struct sc_regex_lexer *l)
				/* just a character */
				curr->children[0] = terminal_tree(t1);
				curr->children[1] = nonterminal_tree(
					SC_REGEX_NTSYM_CLASS, 0);
				        SC_REGEX_NTSYM_CLASS, 0);
				curr->nchildren = 2;
				curr->production = 3;
				curr = curr->children[1];


@@ 280,7 269,7 @@ static struct sc_regex_ptree *reparse_internal(struct sc_regex_input input)
	l.input = input;
	l.index = 0;
	l.nbuf = 0;
	l.tok = (struct sc_regex_token){.sym=0, .c=0};
	l.tok = (struct sc_regex_token){ .sym = 0, .c = 0 };

	/* Create a parse tree! */
	/* printf(";; TOKENS:\n"); */


@@ 293,13 282,13 @@ static struct sc_regex_ptree *reparse_internal(struct sc_regex_input input)

static struct sc_regex_ptree *sc_regex_parse(const char *input)
{
	struct sc_regex_input in = {.str=input, .wstr=NULL};
	struct sc_regex_input in = { .str = input, .wstr = NULL };
	return reparse_internal(in);
}

static struct sc_regex_ptree *sc_regex_parsew(const wchar_t *winput)
{
	struct sc_regex_input in = {.str=NULL, .wstr=winput};
	struct sc_regex_input in = { .str = NULL, .wstr = winput };
	return reparse_internal(in);
}


M src/pike.c => src/pike.c +35 -25
@@ 6,11 6,11 @@
 */

#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
#include <stdbool.h>
#include <string.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <wchar.h>



@@ 36,11 36,11 @@ struct thread_list {
 */

#ifdef SC_REGEX_DEBUG
static void printthreads(
	struct thread_list *tl, struct sc_regex_instr *prog, size_t nsave)
static void printthreads(struct thread_list *tl, struct sc_regex_instr *prog,
                         size_t nsave)
{
	for (size_t i = 0; i < tl->n; i++) {
		printf("T%zu@pc=%lu{", i, (intptr_t) (tl->t[i].pc - prog));
		printf("T%zu@pc=%lu{", i, (intptr_t)(tl->t[i].pc - prog));
		for (size_t j = 0; j < nsave; j++) {
			printf("%lu,", tl->t[i].saved[j]);
		}


@@ 60,11 60,11 @@ static bool range(struct sc_regex_instr in, wchar_t test)
		return false;
	}
	bool result = false;
	char *block = (char *) in.x;
	char *block = (char *)in.x;

	/* use in.s for number of ranges, in.x as char* for ranges. */
	for (size_t i = 0; i < in.s; i++) {
		if (block[i*2] <= test && test <= block [i*2 + 1]) {
		if (block[i * 2] <= test && test <= block[i * 2 + 1]) {
			result = true;
			break; /* short circuit yo! */
		}


@@ 90,9 90,8 @@ static struct thread_list newthread_list(size_t n)
	return tl;
}

static void addthread(
	struct thread_list *threads, struct sc_regex_instr *pc, size_t *saved,
	size_t nsave, size_t sp)
static void addthread(struct thread_list *threads, struct sc_regex_instr *pc,
                      size_t *saved, size_t nsave, size_t sp)
{
	if (pc->lastidx == sp) {
		/* we've executed this instruction on this string index already


@@ 127,7 126,8 @@ static void addthread(

/**
 * @brief "Stash" a list of captures into the "out" pointer.
 * @param new The new list of captures encountered by the SC_REGEX_CODE_MATCH instruction.
 * @param new The new list of captures encountered by the SC_REGEX_CODE_MATCH
 * instruction.
 * @param destination The out pointer where the caller wants the captures.
 */
void stash(size_t *new, size_t **destination)


@@ 151,7 151,9 @@ void stash(size_t *new, size_t **destination)
	*destination = new;
}

static ssize_t reexec_internal(struct sc_regex *r, const struct sc_regex_input input, size_t **saved)
static ssize_t reexec_internal(struct sc_regex *r,
                               const struct sc_regex_input input,
                               size_t **saved)
{
	/* Can have at most n threads, where n is the length of the program.
	 * This is because (as it is now) the thread state is simply a program


@@ 191,8 193,8 @@ static ssize_t reexec_internal(struct sc_regex *r, const struct sc_regex_input i
		 * printthreads(&curr, r->i, nsave);
		 */

		/* Execute each thread (this will only ever reach instructions that consume
		 * input, since addthread() stops with those).
		/* Execute each thread (this will only ever reach instructions
		 * that consume input, since addthread() stops with those).
		 */
		for (size_t t = 0; t < curr.n; t++) {
			struct sc_regex_instr *pc = curr.t[t].pc;


@@ 201,26 203,34 @@ static ssize_t reexec_internal(struct sc_regex *r, const struct sc_regex_input i
			case SC_REGEX_CODE_CHAR:
				if (sc_regex_input_idx(input, sp) != pc->c) {
					free(curr.t[t].saved);
					break; /* fail, don't continue executing this thread */
					break; /* fail, don't continue executing
					          this thread */
				}
				/* add thread containing the next instruction to the next thread list. */
				addthread(&next, pc+1, curr.t[t].saved, nsave, sp+1);
				/* add thread containing the next instruction to
				 * the next thread list. */
				addthread(&next, pc + 1, curr.t[t].saved, nsave,
				          sp + 1);
				break;
			case SC_REGEX_CODE_ANY:
				if (sc_regex_input_idx(input, sp) == '\0') {
					free(curr.t[t].saved);
					break; /* dot can't match end of string! */
					break; /* dot can't match end of string!
					        */
				}
				/* add thread containing the next instruction to the next thread list. */
				addthread(&next, pc+1, curr.t[t].saved, nsave, sp+1);
				/* add thread containing the next instruction to
				 * the next thread list. */
				addthread(&next, pc + 1, curr.t[t].saved, nsave,
				          sp + 1);
				break;
			case SC_REGEX_CODE_RANGE:
			case SC_REGEX_CODE_NRANGE:
				if (!range(*pc, sc_regex_input_idx(input, sp))) {
				if (!range(*pc,
				           sc_regex_input_idx(input, sp))) {
					free(curr.t[t].saved);
					break;
				}
				addthread(&next, pc+1, curr.t[t].saved, nsave, sp+1);
				addthread(&next, pc + 1, curr.t[t].saved, nsave,
				          sp + 1);
				break;
			case SC_REGEX_CODE_MATCH:
				stash(curr.t[t].saved, saved);


@@ 249,13 259,13 @@ static ssize_t reexec_internal(struct sc_regex *r, const struct sc_regex_input i

ssize_t sc_regex_exec(struct sc_regex *r, const char *input, size_t **saved)
{
	struct sc_regex_input in = {.str=input, .wstr=NULL};
	struct sc_regex_input in = { .str = input, .wstr = NULL };
	return reexec_internal(r, in, saved);
}

ssize_t sc_regex_execw(struct sc_regex *r, const wchar_t *input, size_t **saved)
{
	struct sc_regex_input in = {.str=NULL, .wstr=input};
	struct sc_regex_input in = { .str = NULL, .wstr = input };
	return reexec_internal(r, in, saved);
}


M src/sc-regex-private.h => src/sc-regex-private.h +4 -5
@@ 5,13 5,13 @@
#ifndef SMB_REGEX_REGPARSE_H
#define SMB_REGEX_REGPARSE_H

#include <stdlib.h>
#include <stdbool.h>
#include <stdlib.h>
#include <wchar.h>

#include "sc-regex.h"

#define nelem(x) (sizeof(x)/sizeof((x)[0]))
#define nelem(x) (sizeof(x) / sizeof((x)[0]))

#define SC_REGEX_LEXER_BUFSIZE 4



@@ 37,7 37,7 @@ struct sc_regex_instr {
	wchar_t c;                    /* character */
	size_t s;                     /* slot for "saving" a string index */
	struct sc_regex_instr *x, *y; /* targets for jump and split */
	size_t lastidx;               /* used by VM for fast membership testing */
	size_t lastidx; /* used by VM for fast membership testing */
};

struct sc_regex {


@@ 112,7 112,7 @@ struct sc_regex_ptree {
 * This data structure allows functions to be written to not care whether they
 * are receiving wide character strings or "narrow" (or ascii, aka naive)
 * strings.  Which is useful.
*/
 */
struct sc_regex_input {
	const char *str;
	const wchar_t *wstr;


@@ 156,5 156,4 @@ extern char *sc_regex_tsym_names[];
/* Lookup the name of a non-terminal symbol. */
extern char *sc_regex_ntsym_names[];


#endif // SMB_REGEX_REGPARSE_H