#include <stdio.h>
#include <openssl/err.h>
#include <assert.h>
#include <string.h>
#include <ctype.h>
#include "gmni.h"
#include "tofu.h"
#include "url.h"
/* From gmni.c */
struct tofu_config {
struct gemini_tofu tofu;
enum tofu_action action;
};
/* From gmni.c */
static enum tofu_action
tofu_callback(enum tofu_error error, const char *fingerprint,
struct known_host *host, void *data)
{
struct tofu_config *cfg = (struct tofu_config *)data;
enum tofu_action action = cfg->action;
switch (error) {
case TOFU_VALID:
assert(0); // Invariant
case TOFU_INVALID_CERT:
fprintf(stderr,
"The server presented an invalid certificate with fingerprint %s.\n",
fingerprint);
if (action == TOFU_TRUST_ALWAYS) {
action = TOFU_TRUST_ONCE;
}
break;
case TOFU_UNTRUSTED_CERT:
fprintf(stderr,
"The certificate offered by this server is of unknown trust. "
"Its fingerprint is: \n"
"%s\n\n"
"Use -j once to trust temporarily, or -j always to add to the trust store.\n", fingerprint);
break;
case TOFU_FINGERPRINT_MISMATCH:
fprintf(stderr,
"The certificate offered by this server DOES NOT MATCH the one we have on file.\n"
"/!\\ Someone may be eavesdropping on or manipulating this connection. /!\\\n"
"The unknown certificate's fingerprint is:\n"
"%s\n\n"
"The expected fingerprint is:\n"
"%s\n\n"
"If you're certain that this is correct, edit %s:%d\n",
fingerprint, host->fingerprint,
cfg->tofu.known_hosts_path, host->lineno);
return TOFU_FAIL;
}
if (action == TOFU_ASK) {
return TOFU_FAIL;
}
return action;
}
/* From gmnlm.c */
static char *
trim_ws(char *in)
{
while (*in && isspace(*in)) ++in;
return in;
}
/* Determine whether the response can be used. */
const bool
bad_response(enum gemini_result r, struct gemini_response *resp)
{
/* Check response */
if (r != GEMINI_OK) {
fprintf(stderr, "Error: %s\n", gemini_strerr(r, resp));
return true;
}
/* Check request was successful */
if (gemini_response_class(resp->status) != GEMINI_STATUS_CLASS_SUCCESS) {
fprintf(stderr, "Error: Unsuccessful request. Status: %d %s\n",
resp->status, resp->meta);
return true;
}
/* Check response is Gemtext */
if (strncmp(resp->meta, "text/gemini", 11) != 0) {
fprintf(stderr, "Error: Response is not text/gemini. Meta: %s\n",
resp->meta);
return true;
}
return false;
}
/* Return string up to and including the last '/' */
char *
strip_filename(const char *path)
{
size_t last_slash = 0;
for (size_t i = 0; i < strlen(path); ++i) {
if (path[i] == '/')
last_slash = ++i;
}
return strndup(path, last_slash);
}
/* Add scheme and domain to url if needed */
char *
get_absolute_url(char *url, char *base_url)
{
bool absolute = true, colon = false, slash = false;
size_t i = 0;
while ((i < strlen(url)) && absolute) {
switch (url[i]) {
case ':':
if (i == 0 || colon) {
/* If colon is first char, or if there has already been a
colon, then no scheme */
absolute = false;
break;
} else {
/* This is the first colon */
colon = true;
break;
}
case '/':
if (colon && slash) {
/* Last 2 chars were ":/" so url has scheme. Assuming there is
a valid domain after, url is absolute. */
return url;
} else if (colon) {
/* This slash directly follows a colon */
slash = true;
break;
} else {
/* This slash is before any colon, so no scheme */
absolute = false;
break;
}
default:
/* If there has been a colon but this character is not a slash,
then the url is not absolute. Break the loop. */
if (colon)
absolute = false;
}
++i;
}
/* Remove leading slashes */
while (*url && (*url == '/')) ++url;
/* Prepend base_url to url */
char *abs_url = malloc(strlen(base_url) + strlen(url) + 1);
strcpy(abs_url, base_url);
return strcat(abs_url, url);
}
/* Append path2 to path1, delete path2, rename path1 to path2. */
int
cat_and_rename(char *path1, char *path2)
{
FILE *fp1 = fopen(path1, "a");
if (!fp1) {
fprintf(stderr, "Could not open %s for appending.\n", path1);
return 1;
}
FILE *fp2 = fopen(path2, "r");
if (!fp2) {
fprintf(stderr, "Could not open %s for reading.\n", path2);
return 1;
}
char buf[BUFSIZ];
while (fgets(buf, BUFSIZ, fp2) != NULL) {
fputs(buf, fp1);
}
fclose(fp1);
fclose(fp2);
if (remove(path2) != 0) {
fprintf(stderr, "Could not delete %s\n", path2);
}
if (rename(path1, path2) != 0) {
fprintf(stderr, "Could not rename %s to %s\n", path1, path2);
return 1;
}
return 0;
}
int
make_response_from_file(struct gemini_response *resp, FILE *fp) {
BIO *file = BIO_new_fp(fp, BIO_CLOSE);
resp->bio = BIO_new(BIO_f_buffer());
BIO_push(resp->bio, file);
resp->meta = strdup("text/gemini");
resp->status = GEMINI_STATUS_SUCCESS;
resp->fd = -1;
resp->ssl = NULL;
resp->ssl_ctx = NULL;
return 0;
}
const bool
token_is_heading(struct gemini_token tok, int heading_level)
{
if (tok.token == GEMINI_HEADING) {
return tok.heading.level == heading_level;
} else {
return false;
}
}
const bool
token_is_good_link(struct gemini_token tok)
{
if (tok.token == GEMINI_LINK) {
return (strlen(tok.link.url) != 0) && (tok.link.url != NULL);
} else {
return false;
}
}
int
main(int argc, char *argv[])
{
/* Must have exactly one argument */
if (argc != 2) {
fprintf(stderr, "Usage: gmsfn /path/to/config/file.gmi");
return 1;
}
/* Open config file */
FILE *config_fp = fopen(argv[1], "r");
if (!config_fp) {
fprintf(stderr, "Failed to access config file %s.\n", argv[1]);
return 1;
}
char *config_dir = strip_filename(argv[1]);
/* Setup for using gmni */
struct addrinfo hints = {0};
struct gemini_options opts = {
.hints = &hints,
};
struct tofu_config cfg;
cfg.action = TOFU_ASK;
SSL_load_error_strings();
ERR_load_crypto_strings();
opts.ssl_ctx = SSL_CTX_new(TLS_method());
gemini_tofu_init(&cfg.tofu, opts.ssl_ctx, &tofu_callback, &cfg);
/* Make a gmni response from the config file */
struct gemini_response config_resp;
make_response_from_file(&config_resp, config_fp);
/* Parse config file */
bool absolute = true, good_output_file = false;
char *out_path, *tmp_out_path, **existing_links;
FILE *tmp_out_fp;
int num_existing_links = 0;
struct gemini_parser config_parser;
struct gemini_token config_tok;
gemini_parser_init(&config_parser, config_resp.bio);
while (gemini_parser_next(&config_parser, &config_tok) == 0) {
/* Change output file */
if (token_is_heading(config_tok, 3)) {
/* Clear up from previous output file */
if (good_output_file) {
fclose(tmp_out_fp);
free(existing_links);
cat_and_rename(tmp_out_path, out_path);
free(tmp_out_path);
}
if (!absolute) {
free(out_path);
}
/* Get path to the output file. If it does not start with '/'
assume relative to the directory the config file is in */
char *path = trim_ws(config_tok.heading.title);
if ((absolute = path[0] == '/')) {
out_path = path;
} else {
out_path = malloc(strlen(config_dir) + strlen(path) + 1);
strcpy(out_path, config_dir);
strcat(out_path, path);
}
/* If file doesn't exist then it can't be read from so it says it
is bad. TODO: auto-create output files if needed. */
FILE *count_fp = fopen(out_path, "r");
if (!count_fp) {
printf("Bad output file: %s\n", out_path);
/* Skip links until a new output file is given */
good_output_file = false;
continue;
}
/* TODO: Find a way to re-use file pointer and gemini structures.
It seems a bit silly to close and re-create, but it might be
the only way. */
/* Count number of links in output file */
struct gemini_response count_resp;
struct gemini_parser count_parser;
struct gemini_token count_tok;
make_response_from_file(&count_resp, count_fp);
gemini_parser_init(&count_parser, count_resp.bio);
num_existing_links = 0;
while (gemini_parser_next(&count_parser, &count_tok) == 0) {
if (token_is_good_link(count_tok)) {
++num_existing_links;
}
}
gemini_token_finish(&count_tok);
gemini_parser_finish(&count_parser);
gemini_response_finish(&count_resp); // Closes count_fp
/* Copy link urls in output file into existing_links */
if (num_existing_links > 0) {
/* We know this fopen will work from earlier */
FILE *copy_fp = fopen(out_path, "r");
struct gemini_response copy_resp;
struct gemini_parser copy_parser;
struct gemini_token copy_tok;
int i = 0;
/* Contains pointers to strings */
existing_links = (char **) malloc(num_existing_links * sizeof(char *));
make_response_from_file(©_resp, copy_fp);
gemini_parser_init(©_parser, copy_resp.bio);
while (gemini_parser_next(©_parser, ©_tok) == 0) {
if (token_is_good_link(copy_tok)) {
*(existing_links + i) = copy_tok.link.url;
++i;
}
}
gemini_token_finish(©_tok);
gemini_parser_finish(©_parser);
gemini_response_finish(©_resp); // Closes copy_fp
}
/* Make a temporary output file which new links will be written to.
The existing output file will be appended to it before being
deleted, then the temporary file will be renamed. (TODO)*/
tmp_out_path = malloc(strlen(out_path) + 5);
strcpy(tmp_out_path, out_path);
strcat(tmp_out_path, "-tmp");
tmp_out_fp = fopen(tmp_out_path, "w");
if (!tmp_out_fp) {
fprintf(stderr, "Could not open temporary output file.");
free(tmp_out_path);
good_output_file = false;
}
good_output_file = true;
/* Make requests and output links */
} else if (good_output_file && token_is_good_link(config_tok)) {
/* Get gemlog index page */
struct gemini_response resp;
enum gemini_result r = gemini_request(config_tok.link.url, &opts, &resp);
if (bad_response(r, &resp)) {
fprintf(stderr, "Could not access %s.\n", config_tok.link.url);
gemini_response_finish(&resp);
/* Go to next link in config file */
continue;
}
/* Parse gemlog index page */
char *abs_url, *base_url = strip_filename(config_tok.link.url);
struct gemini_parser p;
struct gemini_token tok;
gemini_parser_init(&p, resp.bio);
while (gemini_parser_next(&p, &tok) == 0) {
if (token_is_good_link(tok)) {
abs_url = get_absolute_url(tok.link.url, base_url);
/* Work out if abs_url is already in the output file. */
bool link_already_exists = false;
for (int i = 0; i < num_existing_links; ++i) {
if (strcmp(abs_url, *(existing_links + i)) == 0) {
link_already_exists = true;
break;
}
}
/* If it isn't then add it to the temporary file */
if (!link_already_exists) {
fprintf(tmp_out_fp, "=> %s", abs_url);
if (config_tok.link.text) {
fprintf(tmp_out_fp, " %s", config_tok.link.text);
}
if (config_tok.link.text && tok.link.text) {
fprintf(tmp_out_fp, ":");
}
if (tok.link.text) {
fprintf(tmp_out_fp, " %s", tok.link.text);
}
fprintf(tmp_out_fp, "\n");
}
free(abs_url);
}
}
gemini_token_finish(&tok);
gemini_parser_finish(&p);
gemini_response_finish(&resp);
}
}
/* Clear up from previous output file */
if (good_output_file) {
fclose(tmp_out_fp);
free(existing_links);
cat_and_rename(tmp_out_path, out_path);
free(tmp_out_path);
}
if (!absolute) {
free(out_path);
}
gemini_token_finish(&config_tok);
gemini_parser_finish(&config_parser);
gemini_response_finish(&config_resp); // Closes config_fp
gemini_tofu_finish(&cfg.tofu);
SSL_CTX_free(opts.ssl_ctx);
return 0;
}