~callum/gmsfn

3cf1b2c32f93c215e6b3f470e87caf8d0075461b — Callum Brown 1 year, 3 months ago 3808ce8
Add links to a temp file if not in the output file

Read output file counting number of existing links
Read output file and copy existing links to array-like pointer thingy
Open a temporary file
For each link before the next output file
    Get the page
    For each link in the page
	If it isn't in the array of existing links
	    Add it to the temporary file
1 files changed, 100 insertions(+), 19 deletions(-)

M src/gemlogger.c
M src/gemlogger.c => src/gemlogger.c +100 -19
@@ 231,8 231,9 @@ main(int argc, char *argv[])

	/* Parse config file */
	bool absolute = true, good_output_file = false;
	char *out_path;
	FILE *out_fp = NULL;
	char *out_path, *tmp_out_path, **existing_links;
	FILE *tmp_out_fp;
	int num_existing_links = 0;
	struct gemini_parser config_parser;
	struct gemini_token config_tok;
	gemini_parser_init(&config_parser, config_resp.bio);


@@ 241,10 242,14 @@ main(int argc, char *argv[])
		/* Change output file */
		if (token_is_heading(config_tok, 3)) {
			/* Clear up from previous output file */
			if (out_fp)
				fclose(out_fp);
			if (!absolute)
			if (good_output_file) {
				free(tmp_out_path);
				fclose(tmp_out_fp);
				free(existing_links);
			}
			if (!absolute) {
				free(out_path);
			}

			/* Get path to the output file. If it does not start with '/'
				assume relative to the directory the config file is in */


@@ 257,13 262,71 @@ main(int argc, char *argv[])
				strcat(out_path, path);
			}

			out_fp = fopen(out_path, "w");
			if (!out_fp) {
			/* If file doesn't exist then it can't be read from so it says it
				is bad. TODO: auto-create output files if needed. */
			FILE *count_fp = fopen(out_path, "r");
			if (!count_fp) {
				printf("Bad output file: %s\n", out_path);
				/* Skip links until a new output file is given */
				good_output_file = false;
				continue;
			}

			/* TODO: Find a way to re-use file pointer and gemini structures.
				It seems a bit silly to close and re-create, but it might be
				the only way. */

			/* Count number of links in output file */
			struct gemini_response count_resp;
			struct gemini_parser count_parser;
			struct gemini_token count_tok;
			make_response_from_file(&count_resp, count_fp);
			gemini_parser_init(&count_parser, count_resp.bio);
			num_existing_links = 0;
			while (gemini_parser_next(&count_parser, &count_tok) == 0) {
				if (token_is_good_link(count_tok)) {
					++num_existing_links;
				}
			}
			gemini_token_finish(&count_tok);
			gemini_parser_finish(&count_parser);
			gemini_response_finish(&count_resp); // Closes count_fp

			/* Copy link urls in output file into existing_links */
			if (num_existing_links > 0) {
				/* We know this fopen will work from earlier */
				FILE *copy_fp = fopen(out_path, "r");
				struct gemini_response copy_resp;
				struct gemini_parser copy_parser;
				struct gemini_token copy_tok;
				int i = 0;
				/* Contains pointers to strings */
				existing_links = (char **) malloc(num_existing_links * sizeof(char *));
				make_response_from_file(&copy_resp, copy_fp);
				gemini_parser_init(&copy_parser, copy_resp.bio);
				while (gemini_parser_next(&copy_parser, &copy_tok) == 0) {
					if (token_is_good_link(copy_tok)) {
						*(existing_links + i) = copy_tok.link.url;
						++i;
					}
				}
				gemini_token_finish(&copy_tok);
				gemini_parser_finish(&copy_parser);
				gemini_response_finish(&copy_resp); // Closes copy_fp
			}

			/* Make a temporary output file which new links will be written to.
				The existing output file will be appended to it before being
				deleted, then the temporary file will be renamed. (TODO)*/
			tmp_out_path = malloc(strlen(out_path) + 5);
			strcpy(tmp_out_path, out_path);
			strcat(tmp_out_path, "-tmp");
			tmp_out_fp = fopen(tmp_out_path, "w");
			if (!tmp_out_fp) {
				fprintf(stderr, "Could not open temporary output file.");
				free(tmp_out_path);
				good_output_file = false;
			}
			good_output_file = true;

		/* Make requests and output links */


@@ 287,14 350,28 @@ main(int argc, char *argv[])
			while (gemini_parser_next(&p, &tok) == 0) {
				if (token_is_good_link(tok)) {
					abs_url = get_absolute_url(tok.link.url, base_url);
					fprintf(out_fp, "=> %s", abs_url);
					if (config_tok.link.text)
						fprintf(out_fp, " %s", config_tok.link.text);
					if (config_tok.link.text && tok.link.text)
						fprintf(out_fp, ":");
					if (tok.link.text)
						fprintf(out_fp, " %s", tok.link.text);
					fprintf(out_fp, "\n");
					/* Work out if abs_url is already in the output file. */
					bool link_already_exists = false;
					for (int i = 0; i < num_existing_links; ++i) {
						if (strcmp(abs_url, *(existing_links + i)) == 0) {
							link_already_exists = true;
							break;
						}
					}
					/* If it isn't then add it to the temporary file */
					if (!link_already_exists) {
						fprintf(tmp_out_fp, "=> %s", abs_url);
						if (config_tok.link.text) {
							fprintf(tmp_out_fp, " %s", config_tok.link.text);
						}
						if (config_tok.link.text && tok.link.text) {
							fprintf(tmp_out_fp, ":");
						}
						if (tok.link.text) {
							fprintf(tmp_out_fp, " %s", tok.link.text);
						}
						fprintf(tmp_out_fp, "\n");
					}
					free(abs_url);
				}
			}


@@ 306,14 383,18 @@ main(int argc, char *argv[])
	}

	/* Clear up from previous output file */
	if (out_fp)
		fclose(out_fp);
	if (!absolute)
	if (good_output_file) {
		free(tmp_out_path);
		fclose(tmp_out_fp);
		free(existing_links);
	}
	if (!absolute) {
		free(out_path);
	}

	gemini_token_finish(&config_tok);
	gemini_parser_finish(&config_parser);
	gemini_response_finish(&config_resp);
	gemini_response_finish(&config_resp); // Closes config_fp
	gemini_tofu_finish(&cfg.tofu);
	SSL_CTX_free(opts.ssl_ctx);
	return 0;