Add C++ templates to Hare
This patch adds a long awaited feature to Hare: C++ templates.
The basics of them work roughly as you would expect:
template<T: typename> fn sort(items: []T) void = {
// very efficient sorting algorithm
for (let i = 0z; i < len(items) - 1; i += 1) {
let idx = i;
for (let j = i + 1; j < len(items); j += 1) {
if (cmp(items[j], items[idx]) < 0) {
idx = j;
};
};
const tmp = items[idx];
items[idx] = items[i];
items[i] = tmp;
};
};
template<T: typename> fn cmp(a: T, b: T) int = {
return if (a < b) -1
else if (a > b) 1
else 0;
};
// explicit specialization (special case for rune template argument)
template<> fn cmp<>(a: rune, b: rune) int = cmp(a: u32, b: u32);
// explicit specialization (special case for str template argument)
template<> fn cmp<>(a: str, b: str) int = {
const a = *(&a: *[]u8);
const b = *(&b: *[]u8);
let ln = if (len(a) < len(b)) (len(a), -1) else (len(b), 1);
for (let i = 0z; i < ln.0; i += 1) {
if (a[i] != b[i]) {
return a[i]: int - b[i]: int;
};
};
return if (len(a) == len(b)) 0 else ln.1;
};
@test fn sort() void = {
let x: []int = [73, 43, 49, 67, 54, 19, 9, 31, 3, 31];
sort<int>(x); // can pass template argument explicitly ...
for (let i = 1z; i < len(x); i += 1) {
assert(x[i] >= x[i - 1]);
};
let x: []rune = [
'嶴', '3', '㱙', '~', '퍛', '<', '햴', 'w', 'Ḕ', 'J',
'둵', 'W', '磌', '@', '驚', 'K', 'ꏤ', 'f', '仾', '=',
];
sort(x); // ... or can omit it and let it be deduced
for (let i = 1z; i < len(x); i += 1) {
assert(x[i]: u32 >= x[i - 1]: u32);
};
};
Unlike in C++, the order of declarations doesn't matter. Template
declarations and/or explicit specializations can even span multiple
subunits without issue.
template<T: typename> let x<5, *T> = "special case";
template<N: int, T: typename> let x: [N]T = [0...];
The parser is resilient:
template<int = 0> type shadowed = void;
template<> type shadowed<1> = int;
template<> type shadowed<2> = bool;
fn f(shadowed: int) shadowed<2> = {
return shadowed < 1 >> 0;
};
template<N: int> def X = N * 3;
static assert(X<1>>2);
Templates can also be used on constant declarations, which is very
powerful. For example, here's an implementation of compile-time
factorial and power "functions":
template<> def FACTORIAL<0> = 1u;
template<N: uint> def FACTORIAL = N * FACTORIAL<N - 1>;
static assert(FACTORIAL<6> == 720);
template<BASE: int> def POW<BASE, 0>: int = 1;
template<BASE: int, EXP: uint> def POW = BASE * POW<BASE, EXP - 1>;
static assert(POW<9, 3> == 729);
Ever wanted a generic "default value" operator in Hare? Now you can make
your own!
template<T: typename> def DEFAULT: T = _default<T>{ ... }.default;
template<T: typename> type _default = struct { default: T };
static assert(DEFAULT<int> == 0);
static assert(DEFAULT<str> == "");
static assert(DEFAULT<nullable *opaque> == null);
Templates can also be imported from other modules:
use sort;
@test fn sort() void = {
let x: []int = [73, 43, 49, 67, 54, 19, 9, 31, 3, 31];
sort::sort<int>(x);
assert(sort::sorted(x));
};
The Itanium C++ ABI is used for mangling the names of instantiated
templates. This means that the generated mangled names are compatible
with GCC and Clang, as well as some other C++ implementations. Because
of this, templates declared in Hare can be called from C++, and vice
versa:
example.ha:
export template<N: uint, T: typename> fn bar(
x: *T,
) nullable *opaque = {
static let arr: [N]T = [0...];
arr[..] = [*x...];
return &arr;
};
// mangled as _Z3barILj4EhEPvRT0_
export template fn bar<4>(x: *u8) nullable *opaque;
example.cxx:
#include <cassert>
template<unsigned int N, typename T> void *bar(T &x);
int main()
{
unsigned char c = 'c';
unsigned char *x = (unsigned char *)bar<4>(c);
for (int i = 0; i < 4; i++) {
assert(x[i] == c);
}
}
Here's another example with a much more complex template. this time
function definition is written in C++ and called from Hare:
example.cxx:
#include <iostream>
namespace f {
template<typename ...T, typename TT> void g(float *a,
TT &b, void *c, void *d, float *&(&e)(T *...),
void (&f)(T *&...), void (&g)(T *&...),
const TT *h, const TT &i)
{
std::cout << "hi lol\n";
}
template void g(float *, float *&, void *, void *,
float *&(&)(float *, signed char *, unsigned long long *),
void (&)(float *&, signed char *&, unsigned long long *&),
void (&)(float *&, signed char *&, unsigned long long *&),
float *const *, float *const &);
}
example.ha:
template<T: typename..., TT: typename> fn f::g(
a: nullable *f32,
b: *TT,
c: nullable *opaque,
d: nullable *opaque,
e: *fn(nullable *T...) *nullable *f32,
f: *fn(*nullable *T...) void,
g: *fn(*nullable *T...) void,
h: nullable *const TT,
i: *const TT,
) void;
export @symbol("main") fn main() void = {
// mangled as _ZN1f1gIJfayEPfEEvS1_RT0_PvS4_RFRS1_DpPT_ERFvDpRS7_ESE_PKS2_RSF_
f::g(&0f32, &(null: nullable *f32), &0, &0, &float_func,
&void_func, &void_func, null,
&(null: nullable *f32));
};
fn float_func(
a: nullable *f32,
b: nullable *i8,
c: nullable *u64,
) *nullable *f32 = null: *nullable *f32;
fn void_func(
a: *nullable *f32,
b: *nullable *i8,
c: *nullable *u64,
) void = void;
Variadic templates are implemented using parameter packs:
template<T: typename...> type tagged_with_ptrs = (*T... |);
type t = tagged_with_ptrs<int, str, f64>; // (*int | *str | *f64)
The syntax for expanding a parameter pack is suffixing it with ellipsis.
template<S: int...> fn f(n: int) bool = {
switch (n) {
case 1, S..., 3 =>
return true;
case =>
return false;
};
};
Unfortunately, this introduces a parsing ambiguity when used in a
function prototype or call expression:
type t = int;
fn f(x: t...) void = void;
template<T: typename...> fn g(x: T...) void = void;
template<S: int...> fn h() void = {
let s: []t = [1, 2, 3];
f(s...);
g(S...);
};
In the above example, f is a Hare-style variadic function, whereas g is
a non-variadic function whose parameters are expanded from the parameter
pack T. Likewise, the function call to f uses the slice s as variadic
arguments, whereas the call to g expands the parameter pack S and passes
in the arguments normally. The rule used to disambiguate these cases is
that if the type or expression being expanded contains an unexpanded
parameter pack, then it's treated as a pack expansion.
Various expressions have been introduced to perform computations on/with
parameter packs. The new `size...(X)` expression gives the length of a
parameter pack:
template<S: int...> def PACK_LEN = size...(S);
Folding expressions have also been introduced. These let you perform
binary arithmetic on all items in a parameter pack:
// unary folds
template<S: int...> def LSUB = (... - S);
template<S: int...> def RSUB = (S - ...);
Given the parameter list <1, 2, 3, 4>, LSUB expands to
(((1 - 2) - 3) - 4), and RSUB expands to (1 - (2 - (3 - 4))). Binary
folds are supported as well:
// binary folds
template<N: int, S: int...> def BLSUB = (N - ... - S);
template<N: int, S: int...> def BRSUB = (S - ... - N);
Given the same parameter list (where N=1, and S=<2, 3, 4>), BLSUB
expands to (((1 - 2) - 3) - 4), and BRSUB expands to
(2 - (3 - (4 - 1))). Any binary operator can be used for any form of
folding expression:
template<S: int...> def X = (... < S);
The constant X is only valid if S contains 1 or 2 items; any more items
and you end up comparing a bool to an int, causing translation to fail.
If a parameter pack in a unary fold expression expands to only one item,
the result is that item. If a parameter pack in a binary fold expression
expands to zero items, the result is the non-pack operand. If a
parameter pack in a unary fold expression expands to zero items, the
compiler will usually error out, with two exceptions, demonstrated
below:
template<S: bool...> def AND = (... && S);
template<S: bool...> def OR = (... || S);
static assert(AND && !OR);
These rules are all taken from the C++ standard.
To those concerned with backwards compatibility, don't worry: this
commit is almost entirely backwards compatible. Although 3 new keywords
are added (class, template, typename), they're only lexed as keywords
where permitted by the syntax, so they can still be used as names:
use strings::template;
template<> let template: template::template = [];
There's one place where this commit technically introduces a breaking
change. All mangled names begin with `_Z`, which is fine in C++ since
identifiers beginning with underscores are reserved, but Hare doesn't
have reserved identifiers. So instead, all names in the program which
begin with `_Z` are "adjusted" by suffixing them with `$`. This makes
them valid identifiers in the assembler, but not within Hare, so unless
the programmer uses @symbol, there will never be a name conflict. This
is only a breaking change for code which relies on the symbol emitted
for identifiers beginning with `_Z`.
I hope you all enjoy harec++! Happy april fools! :3