~tachi/miniman

16d04d0f47815cd8471b114cce8cd750c84435d5 — Andrea Pappacoda 2 years ago 232b85c main
some dumb changes I made a few days ago
2 files changed, 145 insertions(+), 61 deletions(-)

M .gitignore
M miniman.c
M .gitignore => .gitignore +1 -0
@@ 1,2 1,3 @@
/miniman
*.o
compile_commands.json

M miniman.c => miniman.c +144 -61
@@ 33,7 33,9 @@ static void parse_header(FILE* file) {
		/* TODO: I should handle lines with any kind of whitespace
		   ...or maybe not, trailing whitespace is bad anyway. */
		case '\n':
			puts(".");
			/* No, inserting dots is dumb. They can be used to increase
			   readability, but here the manpage is generated so who cares */
			/*puts(".");*/
			break;
		default:
			strcpy(buffer, line);


@@ 67,25 69,45 @@ static void parse_header(FILE* file) {
	}
}

/* Handle comments, (sub)section headers and empty lines */
static void parse_trivial(FILE* file) {
	char line[LINE_STRLEN];
	fpos_t fpos;
	int newlines = 0;

	while (1) {
		fgetpos(file, &fpos);
		fgets(line, LINE_STRLEN, file);

		/* Handle comments, section headers and empty lines */
		if (strcmp(line, "!exit!\n") == 0) {
			exit(107);
		}

		switch (line[0]) {
		case ';':
			printf(".\\\"%s", line + 1);
			newlines = 0;
			continue;
		/* FIXME: what about subsections? i.e. ## */
		case '#':
			printf(".Sh%s", line + 1);
			if (line[1] == '#') {
				printf(".Ss%s", line + 2);
			}
			else {
				printf(".Sh%s", line + 1);
			}
			newlines = 0;
			continue;
		case '\n':
			puts(".");
			newlines += 1;
			/* I should only do this if I'm in a text section, but I don't know
			   if it causes problems. */
			if (newlines > 1) {
				puts(".Pp");
				newlines = 0;
			}
			else {
				putchar('\n');
			}
			continue;
		}
		/* 


@@ 98,57 120,79 @@ static void parse_trivial(FILE* file) {
	}
}

int main(const int argc, const char* argv[]) {
	FILE* file;
	int i = 100;
	int c1, c2, c3; /* character + EOF */
	int is_top_level; /* _Bool */

	(void)argc;

	fprintf(stderr,
		"Usage: %s [file] [-o output]\n"
		"If no file is given, or if file is -, reads from stdin\n"
		"Currently only reading from a file is supported\n\n",
		argv[0]
	);

	file = fopen(argv[1], "rb");
	if (file == NULL) {
		return EXIT_FAILURE;
	}

	parse_header(file);
	parse_trivial(file);
/*
 * How can I handle "complex lines"?
 *
 * I could loop over all the text, char by char, and keep track of when I
 * encounter an uppercase character. If in the next iteration I encounter
 * a lowercase character, I keep track of that too. If I then encounter
 * an opening parenthesys, it means that I've encountered a function.
 *
 * Now, how I handle the functions? As I need to translate to mdoc, I have
 * to follow the rules of the MACRO SYNTAX section of OpenBSD's mdoc(7)
 * manual. If a macro is callable, I can simply translate it in place (?),
 * if it is not callable I have a problem: to make the cloudflare-ddns(1)
 * example work I need to insert the macro on a new line when it is not
 * callable, while mdoc allows you to write non-callable macros inside
 * other macros, and they'll be simply written out as simple text.
 * One way I could handle this is that if the non-callable function is not
 * inside any other function I can write it on a new line, otherwise I just
 * copy its text. Anyway I think that this will not happen as much as it
 * happens in mdoc, as miniman is a bit more explicit.
 * Wait, maybe I'm wrong. Maybe I should insert a new macro line (\n + .)
 * everytime a top-level function is opened. Maybe I don't even need to worry
 * about syntax and grammar, because that is already handled by mdoc.
 *
 * I have to keep track of some more state:
 *
 * - I have to know if I'm in a function, so that I can tell if once I encounter
 *   a function I have to create a new macro line or not. This is quite easy to
 *   figure out. For example,
 *
 *       Nm() Op(Ar(api_token record_name))
 *
 *   should be tranlated to
 *
 *       .Nm
 *       .Op Ar api_token record_name
 *
 *   and not
 *
 *       .Nm Op Ar api_token record_name
 *
 *       .Nm
 *       .Op
 *       .Ar api_token record_name
 *
 * - I have to know if I'm in a "text section" or not, so that I can properly
 *   handle empty lines. Empty lines between macro lines can be simply ignored,
 *   while empty lines in text sections should be converted to .Pp. To figure
 *   out if I'm in a text section or not I could check the type of the previous
 *   line. For example,
 *
 *       # DESCRIPTION
 *
 *       Nm() is a tool.
 *
 *       This tool is written in C.
 *
 *   should be translated to
 *
 *       .Sh DESCRIPTION
 *       .Nm()
 *       is a tool.
 *       .Pp
 *       This tool is written in C.
 */

	/*
	 * How can I handle "complex lines"?
	 *
	 * I could loop over all the text, char by char, and keep track of when I
	 * encounter an uppercase character. If in the next iteration I encounter
	 * a lowercase character, I keep track of that too. If I then encounter
	 * an opening parenthesys, it means that I've encountered a function.
	 *
	 * Now, how I handle the functions? As I need to translate to mdoc, I have
	 * to follow the rules of the MACRO SYNTAX section of OpenBSD's mdoc(7)
	 * manual. If a macro is callable, I can simply translate it in place (?),
	 * if it is not callable I have a problem: to make the cloudflare-ddns(1)
	 * example work I need to insert the macro on a new line when it is not
	 * callable, while mdoc allows you to write non-callable macros inside
	 * other macros, and they'll be simply written out as simple text.
	 * One way I could handle this is that if the non-callable function is not
	 * inside any other function I can write it on a new line, otherwise I just
	 * copy its text. Anyway I think that this will not happen as much as it
	 * happens in mdoc, as miniman is a bit more explicit.
	 * Wait, maybe I'm wrong. Maybe I should insert a new macro line (\n + .)
	 * everytime a top-level function is opened.
	 */
static void parse_complex(FILE* file, const int is_top_level) {
	char c1, c2, c3; /* int = char + EOF */

	is_top_level = 1;
	do {
		if (is_top_level) {
			parse_trivial(file);
		}

	/* recursion? */
	while(i --> 0) {
		parse_trivial(file);
		c1 = getc(file);
		if (isupper(c1)) {
			c2 = getc(file);


@@ 157,28 201,67 @@ int main(const int argc, const char* argv[]) {
				if (c3 == '(') { /* this is an opening function */
					if (is_top_level) {
						fputs("\n.", stdout);
						is_top_level = 0;
					}
					putchar(c1);
					putchar(c2);
					putchar(' ');
					while ((c1 = getc(file)) != ')') {
						if (c1 == '(') {
							c1 = ' ';
						}
						putchar(c1);
					while ((c1 = getc(file)) != ')') { /*should I recurse here?*/
						ungetc(c1, file);
						parse_complex(file, 0);
					}
					putchar(' ');
					is_top_level = 1;
					if (is_top_level) {
						putchar('\n');
					}
					else {
						putchar(' ');
					}
				}
				else {
					putchar(c1);
					putchar(c2);
					putchar(c3);
				}
			}
			else {
				putchar(c1);
				putchar(c2);
			}
		}
		else {
			putchar(c1);
		}

		/*if (!isupper(c1)) {
			putchar(c1);
		}*/
		c1 = getc(file);
		ungetc(c1, file);
	} while (is_top_level && c1 != EOF);
}

int main(const int argc, const char* argv[]) {
	FILE* file;

	if (argc != 2) {
		fprintf(stderr,
			"Usage: %s [file] [-o output]\n"
			"If no file is given, or if file is -, reads from stdin\n"
			"Currently only reading from a file is supported\n\n",
			argv[0]
		);
		return EXIT_FAILURE;
	}

	file = fopen(argv[1], "rb");
	if (file == NULL) {
		fprintf(stderr, "Failed to open %s\n", argv[1]);
		return EXIT_FAILURE;
	}

	parse_header(file);
	/*parse_trivial(file); useless, already called by parse_complex */
	parse_complex(file, 1);

	fclose(file);
	return 0;
}