~sircmpwn/hare

hare/strings/iter.ha -rw-r--r-- 3.2 KiB
5c7cd775Alexey Yerin all: add 0 value to enums used as flags 2 days ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
use encoding::utf8;

// An iterator which yields each rune from a string.
export type iterator = struct {
	dec:  utf8::decoder,
	push: (rune | void),
};

// Initializes a string iterator, starting at the beginning of the string.
export fn iter(src: str) iterator = iterator {
	dec = utf8::decode(src),
	push = void,
};

// Initializes a string iterator, starting at the end of the string.
export fn riter(src: str) iterator = {
	let ret = iterator {
		dec = utf8::decode(src),
		push = void,
	};
	ret.dec.offs = len(src);
	return ret;
};

// Get the next rune from an iterator, or void if there are none left.
//
// Be aware that a rune is not the minimum lexographical unit of language in
// Unicode strings. If you use these runes to construct a new string,
// reordering, editing, or omitting any of the runes without careful discretion
// may cause linguistic errors to arise. To avoid this, you may need to use
// [[unicode::graphiter]] instead.
export fn next(iter: *iterator) (rune | void) = {
	match (iter.push) {
	case r: rune =>
		iter.push = void;
		return r;
	case void => void;
	};
	return match (utf8::next(&iter.dec)) {
	case void => void;
	case (utf8::more | utf8::invalid) =>
		abort("Invalid UTF-8 string (this should not happen)");
	case r: rune =>
		yield r;
	};
};

// Get the previous rune from an iterator, or void when at the start of the
// string.
export fn prev(iter: *iterator) (rune | void) = {
	assert(iter.push is void);
	return match (utf8::prev(&iter.dec)) {
	case void =>
		yield void;
	case (utf8::more | utf8::invalid) =>
		abort("Invalid UTF-8 string (this should not happen)");
	case r: rune =>
		yield r;
	};
};

// Causes the next call to [[next]] to return the provided rune, effectively
// un-reading it. The next call using this iterator *must* be [[next]]; all other
// functions will cause the program to abort until the pushed rune is consumed.
// This does not modify the underlying string, and as such, subsequent calls to
// functions like [[prev]] or [[iter_str]] will behave as if push were never called.
export fn push(iter: *iterator, r: rune) void = {
	assert(iter.push is void);
	iter.push = r;
};

// Return a substring from the next rune to the end of the string.
export fn iter_str(iter: *iterator) str = {
	assert(iter.push is void);
	return fromutf8(iter.dec.src[iter.dec.offs..]);
};

@test fn iter() void = {
	let s = iter("こんにちは");
	assert(prev(&s) is void);
	const expected1 = ['こ', 'ん'];
	for (let i = 0z; i < len(expected1); i += 1) {
		match (next(&s)) {
		case r: rune =>
			assert(r == expected1[i]);
		case void =>
			abort();
		};
	};
	assert(iter_str(&s) == "にちは");
	assert(prev(&s) as rune == 'ん');
	const expected2 = ['ん', 'に', 'ち', 'は'];
	for (let i = 0z; i < len(expected2); i += 1) {
		match (next(&s)) {
		case r: rune =>
			assert(r == expected2[i]);
		case void =>
			abort();
		};
	};
	assert(next(&s) is void);
	assert(next(&s) is void);
	push(&s, 'q');
	assert(next(&s) as rune == 'q');
	assert(prev(&s) as rune == 'は');

	s = riter("にちは");
	const expected3 = ['は', 'ち', 'に'];
	for (let i = 0z; i< len(expected3); i += 1) {
		match (prev(&s)) {
		case r: rune =>
			assert(r == expected3[i]);
		case void =>
			abort();
		};
	};
	assert(prev(&s) is void);
	assert(next(&s) as rune == 'に');
};