~emersion/mrsh

2a9e336e1c577494a32d35add8fda052a371dce4 — Simon Ser 1 year, 13 days ago 308a3c6
shell/word: preserve AST in split_fields

Instead of converting fields to strings, keep them as words. This is necessary
for preventing pathname expansion from expanding quoted words.

References: https://github.com/emersion/mrsh/issues/24
5 files changed, 70 insertions(+), 22 deletions(-)

M ast.c
M builtin/read.c
M include/shell/word.h
M shell/task/simple_command.c
M shell/word.c
M ast.c => ast.c +3 -1
@@ 334,7 334,9 @@ struct mrsh_word_list *mrsh_word_list_create(struct mrsh_array *children,
	struct mrsh_word_list *wl = calloc(1, sizeof(struct mrsh_word_list));
	wl->word.node.type = MRSH_NODE_WORD;
	wl->word.type = MRSH_WORD_LIST;
	wl->children = *children;
	if (children != NULL) {
		wl->children = *children;
	}
	wl->double_quoted = double_quoted;
	return wl;
}

M builtin/read.c => builtin/read.c +8 -0
@@ 60,6 60,14 @@ int builtin_read(struct mrsh_state *state, int argc, char *argv[]) {
	split_fields(&fields, &ws->word, mrsh_env_get(state, "IFS", NULL));
	mrsh_word_destroy(&ws->word);

	struct mrsh_array strs = {0};
	get_fields_str(&strs, &fields);
	for (size_t i = 0; i < fields.len; ++i) {
		mrsh_word_destroy(fields.data[i]);
	}
	mrsh_array_finish(&fields);
	fields = strs;

	if (fields.len <= (size_t)(argc - mrsh_optind)) {
		for (size_t i = 0; i < fields.len; ++i) {
			mrsh_env_set(state, argv[mrsh_optind + i], (char *)fields.data[i], MRSH_VAR_ATTRIB_NONE);

M include/shell/word.h => include/shell/word.h +2 -0
@@ 22,6 22,8 @@ void expand_tilde(struct mrsh_state *state, char **str_ptr);
 */
void split_fields(struct mrsh_array *fields, struct mrsh_word *word,
	const char *ifs);
void get_fields_str(struct mrsh_array *fields_str,
	struct mrsh_array *fields_word);
/**
 * Performs pathname expansion on each item in `fields`.
 */

M shell/task/simple_command.c => shell/task/simple_command.c +8 -0
@@ 231,6 231,14 @@ static void get_args(struct mrsh_array *args, struct mrsh_simple_command *sc,
	}
	assert(fields.len > 0);

	struct mrsh_array strs = {0};
	get_fields_str(&strs, &fields);
	for (size_t i = 0; i < fields.len; ++i) {
		mrsh_word_destroy(fields.data[i]);
	}
	mrsh_array_finish(&fields);
	fields = strs;

	if (ctx->state->options & MRSH_OPT_NOGLOB) {
		*args = fields;
	} else {

M shell/word.c => shell/word.c +49 -21
@@ 57,49 57,76 @@ void expand_tilde(struct mrsh_state *state, char **str_ptr) {
}

struct split_fields_data {
	struct mrsh_array *fields;
	struct mrsh_word_list *cur_field;
	const char *ifs, *ifs_non_space;
	bool in_ifs, in_ifs_non_space;
};

static void _split_fields(struct mrsh_array *fields, struct mrsh_buffer *buf,
		struct mrsh_word *word, bool double_quoted,
		struct split_fields_data *data) {
static void add_to_cur_field(struct split_fields_data *data,
		struct mrsh_word *word) {
	if (data->cur_field == NULL) {
		data->cur_field = mrsh_word_list_create(NULL, false);
		mrsh_array_add(data->fields, data->cur_field);
	}
	mrsh_array_add(&data->cur_field->children, word);
}

static void _split_fields(struct split_fields_data *data,
		struct mrsh_word *word) {
	switch (word->type) {
	case MRSH_WORD_STRING:;
		struct mrsh_word_string *ws = mrsh_word_get_string(word);

		if (double_quoted || ws->single_quoted) {
			mrsh_buffer_append(buf, ws->str, strlen(ws->str));
		if (ws->single_quoted) {
			add_to_cur_field(data, mrsh_word_copy(word));
			data->in_ifs = data->in_ifs_non_space = false;
			return;
		}

		struct mrsh_buffer buf = {0};
		size_t len = strlen(ws->str);
		for (size_t i = 0; i < len; ++i) {
			char c = ws->str[i];
			if (strchr(data->ifs, c) == NULL) {
				mrsh_buffer_append_char(buf, c);
				mrsh_buffer_append_char(&buf, c);
				data->in_ifs = data->in_ifs_non_space = false;
				continue;
			}

			bool is_ifs_non_space = strchr(data->ifs_non_space, c) != NULL;
			if (!data->in_ifs || (is_ifs_non_space && data->in_ifs_non_space)) {
				mrsh_buffer_append_char(buf, '\0');
				char *str = mrsh_buffer_steal(buf);
				mrsh_array_add(fields, str);
				mrsh_buffer_append_char(&buf, '\0');
				char *str = mrsh_buffer_steal(&buf);
				add_to_cur_field(data,
					&mrsh_word_string_create(str, false)->word);
				data->cur_field = NULL;
				data->in_ifs = true;
			} else if (is_ifs_non_space) {
				data->in_ifs_non_space = true;
			}
		}

		if (!data->in_ifs) {
			mrsh_buffer_append_char(&buf, '\0');
			char *str = mrsh_buffer_steal(&buf);
			add_to_cur_field(data,
				&mrsh_word_string_create(str, false)->word);
		}

		mrsh_buffer_finish(&buf);
		break;
	case MRSH_WORD_LIST:;
		struct mrsh_word_list *wl = mrsh_word_get_list(word);

		if (wl->double_quoted) {
			add_to_cur_field(data, mrsh_word_copy(word));
			return;
		}

		for (size_t i = 0; i < wl->children.len; ++i) {
			struct mrsh_word *child = wl->children.data[i];
			_split_fields(fields, buf, child,
				double_quoted || wl->double_quoted, data);
			_split_fields(data, child);
		}
		break;
	default:


@@ 112,8 139,7 @@ void split_fields(struct mrsh_array *fields, struct mrsh_word *word,
	if (ifs == NULL) {
		ifs = " \t\n";
	} else if (ifs[0] == '\0') {
		char *str = mrsh_word_str(word);
		mrsh_array_add(fields, str);
		mrsh_array_add(fields, mrsh_word_copy(word));
		return;
	}



@@ 126,23 152,25 @@ void split_fields(struct mrsh_array *fields, struct mrsh_word *word,
		}
	}

	struct mrsh_buffer buf = {0};
	struct split_fields_data data = {
		.fields = fields,
		.ifs = ifs,
		.ifs_non_space = ifs_non_space,
		.in_ifs = true,
	};
	_split_fields(fields, &buf, word, false, &data);
	if (!data.in_ifs) {
		mrsh_buffer_append_char(&buf, '\0');
		char *str = mrsh_buffer_steal(&buf);
		mrsh_array_add(fields, str);
	}
	mrsh_buffer_finish(&buf);
	_split_fields(&data, word);

	free(ifs_non_space);
}

void get_fields_str(struct mrsh_array *fields_str,
		struct mrsh_array *fields_word) {
	for (size_t i = 0; i < fields_word->len; i++) {
		struct mrsh_word *word = fields_word->data[i];
		mrsh_array_add(fields_str, mrsh_word_str(word));
	}
}

bool expand_pathnames(struct mrsh_array *expanded, struct mrsh_array *fields) {
	const char metachars[] = "*?[";