#include <getopt.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "misc.h"
#define ESCAPED_REPL_LEN 6
// Unreserved HTML characters, only " ' & < and > are to be escaped
static unsigned char unreserved[256] =
{
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
};
void htmlencode(const char *restrict in, char *restrict out, size_t len)
{
for (size_t i = 0; i < len; ++i)
{
if (unreserved[(unsigned char)in[i]])
{
*(out++) = in[i];
}
else
{
sprintf(out, "&#x%02X;", in[i]);
out += ESCAPED_REPL_LEN;
}
}
*out = '\0';
}
NORETURN void usage(int exit_status)
{
printf(
"NAME\n"
" %s - HTML encode strings\n"
"\n"
"SYNOPSIS\n"
" %s [OPTIONS] [URL...]\n"
"\n"
"DESCRIPTION\n"
" HTML encode the arguments and print them to stdout separated by newlines.\n"
" If there is no argument or the only argument is '-', stdin is read as a \n"
" newline separated argument list.\n"
"\n"
"OPTIONS\n"
" -a\n"
" Only encode \" and ', to be used as attribute value.\n"
"\n"
" -e\n"
" Only encode <, > and &, to be used as element content.\n"
"\n"
" -h\n"
" Print this help message and exit.\n"
"\n"
" -v\n"
" Print the version and exit.\n"
"\n",
PROG_NAME, PROG_NAME);
exit(exit_status);
}
int main(int argc, char **argv)
{
signals_nointerrupt();
for (int opt; (opt = getopt(argc, argv, "aehv")) != -1;)
{
switch (opt)
{
case 'a':
unreserved['&'] = 1;
unreserved['<'] = 1;
unreserved['>'] = 1;
break;
case 'e':
unreserved['"'] = 1;
unreserved['\''] = 1;
break;
case 'h':
usage(EXIT_SUCCESS);
break;
case 'v':
puts(PROG_VERSION);
exit(EXIT_SUCCESS);
default:
exit(EXIT_FAILURE);
}
}
argc -= optind;
argv += optind;
if (argc == 0 || (argc == 1 && !strcmp(argv[0], "-")))
{
char *buf = NULL, *line = NULL;
size_t buflen = 0, linealloc = 0;
for (ssize_t read; (read = xgetline(&line, &linealloc, stdin)) != -1; )
{
//Use ESCAPED_REPL_LEN time the input size (no need to check size
//when sprinting, this way)
if ((size_t)read > (SIZE_MAX - 1) / ESCAPED_REPL_LEN)
{
die("Input string is too big (lim: %zu)",
(SIZE_MAX - 1) / ESCAPED_REPL_LEN);
}
if (buflen < (size_t)read * ESCAPED_REPL_LEN + 1)
{
buflen = read * ESCAPED_REPL_LEN + 1;
buf = xrealloc(buf, buflen);
}
htmlencode(line, buf, read);
puts(buf);
}
free(buf);
free(line);
}
else
{
char *buf = NULL;
size_t buflen = 0;
for (int i = 0; i < argc; ++i)
{
const size_t arglen = strlen(argv[i]);
if (arglen > (SIZE_MAX - 1) / ESCAPED_REPL_LEN)
{
die("Input string is too big (lim: %zu)",
(SIZE_MAX - 1) / ESCAPED_REPL_LEN);
}
if (buflen < arglen * ESCAPED_REPL_LEN + 1)
{
buflen = arglen * ESCAPED_REPL_LEN + 1;
buf = xrealloc(buf, buflen);
}
htmlencode(argv[i], buf, arglen);
puts(buf);
}
free(buf);
}
return EXIT_SUCCESS;
}