~sircmpwn/scdoc

7f298175918c16b3bc467b47bde976def0650d68 — Drew DeVault 2 years ago bcd942a
Use a more robust approach for in-word-underscores
4 files changed, 35 insertions(+), 20 deletions(-)

M include/util.h
M src/main.c
M src/util.c
M test/inline-formatting
M include/util.h => include/util.h +0 -1
@@ 12,7 12,6 @@ struct parser {
	uint32_t flags;
	const char *str;
	int fmt_line, fmt_col;
	uint32_t last[2];
};

enum formatting {

M src/main.c => src/main.c +9 -7
@@ 163,11 163,6 @@ static void parse_format(struct parser *p, enum formatting fmt) {
		}
		fprintf(p->output, "\\fR");
	} else {
		if (fmt == FORMAT_UNDERLINE && !isspace(p->last[0])) {
			// Ignore underscores in the middle of words
			utf8_fputch(p->output, '_');
			return;
		}
		fprintf(p->output, "\\f%c", formats[fmt]);
		p->fmt_line = p->line;
		p->fmt_col = p->col;


@@ 199,7 194,7 @@ static void parse_linebreak(struct parser *p) {
}

static void parse_text(struct parser *p) {
	uint32_t ch;
	uint32_t ch, last = ' ';
	int i = 0;
	while ((ch = parser_getch(p)) != UTF8_INVALID) {
		switch (ch) {


@@ 217,7 212,13 @@ static void parse_text(struct parser *p) {
			parse_format(p, FORMAT_BOLD);
			break;
		case '_':
			parse_format(p, FORMAT_UNDERLINE);
			if ((p->flags & FORMAT_UNDERLINE)) {
				parse_format(p, FORMAT_UNDERLINE);
			} else if (!p->flags && isspace(last)) {
				parse_format(p, FORMAT_UNDERLINE);
			} else {
				utf8_fputch(p->output, ch);
			}
			break;
		case '+':
			parse_linebreak(p);


@@ 233,6 234,7 @@ static void parse_text(struct parser *p) {
			}
			/* fallthrough */
		default:
			last = ch;
			utf8_fputch(p->output, ch);
			break;
		}

M src/util.c => src/util.c +10 -12
@@ 14,26 14,24 @@ void parser_fatal(struct parser *parser, const char *err) {
}

uint32_t parser_getch(struct parser *parser) {
	uint32_t ch = 0;
	if (parser->qhead) {
		ch = parser->queue[--parser->qhead];
	} else if (parser->str) {
		return parser->queue[--parser->qhead];
	}
	if (parser->str) {
		uint32_t ch = utf8_decode(&parser->str);
		if (!ch || ch == UTF8_INVALID) {
			parser->str = NULL;
			return UTF8_INVALID;
		}
		return ch;
	}
	uint32_t ch = utf8_fgetch(parser->input);
	if (ch == '\n') {
		parser->col = 0;
		++parser->line;
	} else {
		ch = utf8_fgetch(parser->input);
		if (ch == '\n') {
			parser->col = 0;
			++parser->line;
		} else {
			++parser->col;
		}
		++parser->col;
	}
	parser->last[0] = parser->last[1];
	parser->last[1] = ch;
	return ch;
}


M test/inline-formatting => test/inline-formatting +16 -0
@@ 9,6 9,22 @@ _hello *world*_
EOF
end 1

begin "Ignores underscores in words"
scdoc <<EOF | grep -v 'fR' >/dev/null
test(8)

hello_world
EOF
end 0

begin "Allows underscores in bolded words"
scdoc <<EOF | grep '^\\fBhello_world\\fR' >/dev/null
test(8)

*hello_world*
EOF
end 0

begin "Emits bold text"
scdoc <<EOF | grep '^hello \\fBworld\\fR' >/dev/null
test(8)