~callum/gmsfn

ref: 68930d0a7b1a4ca758b365fe7f61452eeaa92bb9 gmsfn/src/gemlogger.c -rw-r--r-- 6.3 KiB
68930d0aCallum Brown Make config file path command-line argument 1 year, 7 months ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
#include <stdio.h>
#include <openssl/err.h>
#include <assert.h>
#include <string.h>
#include <ctype.h>
#include "gmni.h"
#include "tofu.h"
#include "url.h"

/* From gmni.c */
struct tofu_config {
	struct gemini_tofu tofu;
	enum tofu_action action;
};

/* From gmni.c */
static enum tofu_action
tofu_callback(enum tofu_error error, const char *fingerprint,
	struct known_host *host, void *data)
{
	struct tofu_config *cfg = (struct tofu_config *)data;
	enum tofu_action action = cfg->action;
	switch (error) {
	case TOFU_VALID:
		assert(0); // Invariant
	case TOFU_INVALID_CERT:
		fprintf(stderr,
			"The server presented an invalid certificate with fingerprint %s.\n",
			fingerprint);
		if (action == TOFU_TRUST_ALWAYS) {
			action = TOFU_TRUST_ONCE;
		}
		break;
	case TOFU_UNTRUSTED_CERT:
		fprintf(stderr,
			"The certificate offered by this server is of unknown trust. "
			"Its fingerprint is: \n"
			"%s\n\n"
			"Use -j once to trust temporarily, or -j always to add to the trust store.\n", fingerprint);
		break;
	case TOFU_FINGERPRINT_MISMATCH:
		fprintf(stderr,
			"The certificate offered by this server DOES NOT MATCH the one we have on file.\n"
			"/!\\ Someone may be eavesdropping on or manipulating this connection. /!\\\n"
			"The unknown certificate's fingerprint is:\n"
			"%s\n\n"
			"The expected fingerprint is:\n"
			"%s\n\n"
			"If you're certain that this is correct, edit %s:%d\n",
			fingerprint, host->fingerprint,
			cfg->tofu.known_hosts_path, host->lineno);
		return TOFU_FAIL;
	}

	if (action == TOFU_ASK) {
		return TOFU_FAIL;
	}

	return action;
}


/* Determine whether the response can be used. */
const bool
bad_response(enum gemini_result r, struct gemini_response *resp)
{
	/* Check response */
	if (r != GEMINI_OK) {
		fprintf(stderr, "Error: %s\n", gemini_strerr(r, resp));
		return true;
	}

	/* Check request was successful */
	if (gemini_response_class(resp->status) != GEMINI_STATUS_CLASS_SUCCESS) {
		fprintf(stderr, "Error: Unsuccessful request. Status: %d %s\n",
			resp->status, resp->meta);
		return true;
	}

	/* Check response is Gemtext */
	if (strncmp(resp->meta, "text/gemini", 11) != 0) {
		fprintf(stderr, "Error: Response is not text/gemini. Meta: %s\n",
			resp->meta);
		return true;
	}

	return false;
}


/* Determine whether the given token is a link and has a url */
const bool
token_is_good_link(struct gemini_token tok)
{
	if (tok.token == GEMINI_LINK) {
		return (strlen(tok.link.url) != 0) && (tok.link.url != NULL);
	} else {
		return false;
	}
}


/* Return url up to and including the last '/' */
char *
get_url_without_filename(const char *url)
{
	size_t last_slash;
	for (size_t i = 0; i < strlen(url); ++i) {
		if (url[i] == '/')
			last_slash = ++i;
	}
	return strndup(url, last_slash);
}


/* Add scheme and domain to url if needed */
char *
get_absolute_url(char *url, char *base_url)
{
	bool not_absolute = false, colon = false, slash = false;
	size_t i = 0;
	while (i < strlen(url) && !(not_absolute)) {
		switch (url[i]) {
		case ':':
			if (i == 0 || colon) {
				/* If colon is first char then no scheme */
				/* 2 colons but no slashes means no scheme */
				not_absolute = true;
				break;
			} else {
				/* This is the first colon */
				colon = true;
				break;
			}
		case '/':
			if (colon && slash) {
				/* Last 2 chars were ":/" so url has scheme. Assuming there is
					a valid domain after, url is absolute. */
				return url;
			} else if (colon) {
				/* This slash directly follows a colon */
				slash = true;
				break;
			} else {
				/* This slash is before any colon, so no scheme */
				not_absolute = true;
				break;
			}
		default:
			/* If there has been a colon but this character is not a slash,
				then the url is not absolute. Break the loop. */
			if (colon)
				not_absolute = true;
		}
		++i;
	}
	/* remove leading slashes */
	while (*url && (*url == '/')) ++url;
	char *abs_url = malloc(strlen(base_url) + strlen(url) + 1);
	strcpy(abs_url, base_url);
	return strcat(abs_url, url);
}


int
main(int argc, char *argv[])
{
	/* Must have exactly one argument */
	if (argc != 2) {
		fprintf(stderr, "Usage: gemlogger /path/to/config/file.gmi");
		return 1;
	}

	/* Open config file */
	FILE *config_fp = fopen(argv[1], "r");
	if (!config_fp) {
		fprintf(stderr, "Failed to access config file %s.\n", argv[1]);
		return 1;
	}

	/* Setup for using gmni */
	struct addrinfo hints = {0};
	struct gemini_options opts = {
		.hints = &hints,
	};
	struct tofu_config cfg;
	cfg.action = TOFU_ASK;
	SSL_load_error_strings();
	ERR_load_crypto_strings();
	opts.ssl_ctx = SSL_CTX_new(TLS_method());
	gemini_tofu_init(&cfg.tofu, opts.ssl_ctx, &tofu_callback, &cfg);

	/* Make a gmni response from the config file */
	BIO *file = BIO_new_fp(config_fp, BIO_CLOSE);
	struct gemini_response config_resp;
	config_resp.bio = BIO_new(BIO_f_buffer());
	BIO_push(config_resp.bio, file);
	config_resp.meta = strdup("text/gemini");
	config_resp.status = GEMINI_STATUS_SUCCESS;
	config_resp.fd = -1;
	config_resp.ssl = NULL;
	config_resp.ssl_ctx = NULL;

	/* Parse config file */
	struct gemini_parser config_parser;
	gemini_parser_init(&config_parser, config_resp.bio);
	struct gemini_token config_tok;
	while (gemini_parser_next(&config_parser, &config_tok) == 0) {
		if (token_is_good_link(config_tok)) {
			/* Get gemlog index page */
			struct gemini_response resp;
			enum gemini_result r = gemini_request(config_tok.link.url, &opts, &resp);
			if (bad_response(r, &resp)) {
				fprintf(stderr, "Could not access %s.\n", config_tok.link.url);
				/* Go to next link in config file */
				continue;
			}

			char *base_url = get_url_without_filename(config_tok.link.url);
			char *abs_url;
			FILE *out = stdout;

			/* Parse gemlog index page */
			struct gemini_parser p;
			gemini_parser_init(&p, resp.bio);
			struct gemini_token tok;
			while (gemini_parser_next(&p, &tok) == 0) {
				if (token_is_good_link(tok)) {
					abs_url = get_absolute_url(tok.link.url, base_url);
					fprintf(out, "link text: %s\n", tok.link.text);
					fprintf(out, "link url: %s\n", tok.link.url);
					fprintf(out, "abs  url: %s\n", abs_url);
					fprintf(out, "\n");
					free(abs_url);
				}
			}
			gemini_token_finish(&tok);
			gemini_parser_finish(&p);
			gemini_response_finish(&resp);
		}
	}

	gemini_token_finish(&config_tok);
	gemini_parser_finish(&config_parser);
	gemini_response_finish(&config_resp);
	gemini_tofu_finish(&cfg.tofu);
	SSL_CTX_free(opts.ssl_ctx);
	return 0;
}