~sircmpwn/hare unlisted

cdcb4e915b5f2b0c02221b1c59a6439f368c58db — Vlad-Stefan Harbuz 2 months ago 834cae4
Add a variety of float functions to math/floats.ha

Signed-off-by: Vlad-Stefan Harbuz <vlad@vladh.net>
1 files changed, 437 insertions(+), 7 deletions(-)

M math/floats.ha
M math/floats.ha => math/floats.ha +437 -7
@@ 1,3 1,5 @@
use types;

// The floating point value representing Not a Number, i.e. an undefined or
// unrepresentable value. You cannot test if a number is NaN by comparing to
// this value; see [[isnan]] instead.


@@ 7,8 9,19 @@ export def NAN: f32 = 0.0 / 0.0;
// negative infinity.
export def INF: f32 = 1.0 / 0.0;

// Returns true if the given f64 is NaN.
export fn isnanf64(n: f64) bool = n != n;

// Returns true if the given f32 is NaN.
export fn isnanf32(n: f32) bool = n != n;

// Returns true if the given floating-point number is NaN.
export fn isnan(n: f64) bool = n != n;
export fn isnan(n: types::floating) bool = {
	return match (n) {
		f: f64 => isnanf64(f),
		f: f32 => isnanf32(f),
	};
};

@test fn isnan() void = {
	assert(isnan(NAN));


@@ 35,7 48,7 @@ export fn isinf(n: f64) bool = {
};

// Returns true if the given floating-point number is normal.
export fn isnormal(n: (f32 | f64)) bool = {
export fn isnormal(n: types::floating) bool = {
	return match (n) {
		n: f32 => isnormalf32(n),
		n: f64 => isnormalf64(n),


@@ 59,7 72,7 @@ export fn isnormalf64(n: f64) bool = {
};

// Returns true if the given floating-point number is subnormal.
export fn issubnormal(n: (f32 | f64)) bool = {
export fn issubnormal(n: types::floating) bool = {
	return match (n) {
		n: f32 => issubnormalf32(n),
		n: f64 => issubnormalf64(n),


@@ 171,11 184,17 @@ export def F32_EXPONENT_BITS: u64 = 8;
// from the exponent in the binary representation to get the actual exponent.
export def F32_EXPONENT_BIAS: u16 = 127;

def F64_MANTISSA_MASK: u64 = (1 << F64_MANTISSA_BITS) - 1;
def F64_EXPONENT_MASK: u64 = (1 << F64_EXPONENT_BITS) - 1;
// Mask with each bit of an f64's mantissa set.
export def F64_MANTISSA_MASK: u64 = (1 << F64_MANTISSA_BITS) - 1;

def F32_MANTISSA_MASK: u64 = (1 << F32_MANTISSA_BITS) - 1;
def F32_EXPONENT_MASK: u64 = (1 << F32_EXPONENT_BITS) - 1;
// Mask with each bit of an f64's exponent set.
export def F64_EXPONENT_MASK: u64 = (1 << F64_EXPONENT_BITS) - 1;

// Mask with each bit of an f32's mantissa set.
export def F32_MANTISSA_MASK: u64 = (1 << F32_MANTISSA_BITS) - 1;

// Mask with each bit of an f32's exponent set.
export def F32_EXPONENT_MASK: u64 = (1 << F32_EXPONENT_BITS) - 1;

// The largest representable f64 value which is less than Infinity.
export def F64_MAX_NORMAL: f64 = 1.7976931348623157e+308;


@@ 195,6 214,29 @@ export def F32_MIN_NORMAL: f32 = 1.1754944e-38;
// The smallest (subnormal) f32 value greater than zero.
export def F32_MIN: f32 = 1.0e-45;

// The mask that gets an f64's sign.
def F64_SIGN_MASK: u64 = 1u64 << 63;

// The mask that sets all exponent bits to 0.
// NOTE: Replace with the following expression once the lexer supports it
// 0u64 & ~(F64_EXPONENT_MASK << F64_MANTISSA_BITS);
def F64_EXP_REMOVAL_MASK: u64 =
	0b1000000000001111111111111111111111111111111111111111111111111111;

// The f64 that contains only an exponent that evaluates to zero.
def F64_EXP_ZERO: u64 = ((F64_EXPONENT_BIAS: u64) - 1) << F64_MANTISSA_BITS;

// The mask that gets an f32's sign.
def F32_SIGN_MASK: u32 = 1u32 << 31;

// The mask that sets all exponent bits to 0.
// NOTE: Replace with the following expression once the lexer supports it
// 0u32 & ~(F32_EXPONENT_MASK << F32_MANTISSA_BITS);
def F32_EXP_REMOVAL_MASK: u32 = 0b10000000011111111111111111111111;

// The f32 that contains only an exponent that evaluates to zero.
def F32_EXP_ZERO: u32 = ((F32_EXPONENT_BIAS: u32) - 1) << (F32_MANTISSA_BITS: u32);

// Contains information about the structure of a specific floating point number
// type.
export type floatinfo = struct {


@@ 228,3 270,391 @@ export const f32info: floatinfo = floatinfo {
	expmask = (1 << 8) - 1,
};

// Returns the absolute value of n.
export fn absf64(n: f64) f64 = {
	return f64frombits(f64bits(n) & ~F64_SIGN_MASK);
};

// Returns the absolute value of n.
export fn absf32(n: f32) f32 = {
	return f32frombits(f32bits(n) & ~F32_SIGN_MASK);
};

// Returns the absolute value of n.
export fn abs(n: types::floating) f64 = {
	return match (n) {
		n: f64 => absf64(n),
		n: f32 => (absf32(n): f64),
	};
};

@test fn test_abs() void = {
	assert(absf64(2.0f64) == 2.0f64);
	assert(absf32(2.0f32) == 2.0f32);
	assert(abs(2.0f64) == 2.0f64);
	assert(abs(2.0f32) == 2.0f64);
	assert(abs(-2.0f64) == 2.0f64);
	assert(abs(-2.0f32) == 2.0f32);
	assert(abs(0f32) == 0f32);
	assert(abs(0f64) == 0f64);
};

// Returns 1 if x is positive and -1 if x is negative. Note that zero is also signed.
export fn signf64(x: f64) i64 = {
	if (f64bits(x) & F64_SIGN_MASK == 0) {
		return 1i64;
	} else {
		return -1i64;
	};
};

// Returns 1 if x is positive and -1 if x is negative. Note that zero is also signed.
export fn signf32(x: f32) i64 = {
	if (f32bits(x) & F32_SIGN_MASK == 0) {
		return 1i64;
	} else {
		return -1i64;
	};
};

// Returns 1 if x is positive and -1 if x is negative. Note that zero is also signed.
export fn sign(x: types::floating) i64 = {
	return match (x) {
		n: f64 => signf64(n),
		n: f32 => signf32(n),
	};
};

@test fn test_sign() void = {
	assert(sign(0.0f64) == 1i64);
	assert(sign(-0.0f64) == -1i64);
	assert(sign(0.0f32) == 1i64);
	assert(sign(-0.0f32) == -1i64);
	assert(sign(1.5f64) == 1i64);
	assert(sign(-1.5f64) == -1i64);
};

// Returns x, but with the sign of y.
export fn copysignf64(x: f64, y: f64) f64 = {
	return f64frombits((f64bits(x) & ~F64_SIGN_MASK) |
		(f64bits(y) & F64_SIGN_MASK));
};

// Returns x, but with the sign of y.
export fn copysignf32(x: f32, y: f32) f32 = {
	return f32frombits((f32bits(x) & ~F32_SIGN_MASK) |
		(f32bits(y) & F32_SIGN_MASK));
};

// Returns x, but with the sign of y.
export fn copysign(x: types::floating, y: types::floating) f64 = {
	return match (x) {
		n: f64 => copysignf64(n, (y: f64)),
		n: f32 => (copysignf32(n, (y: f32)): f64),
	};
};

@test fn test_copysign() void = {
	assert(copysign(100.0f64, 1.0f64) == 100.0f64);
	assert(copysign(100.0f64, -1.0f64) == -100.0f64);
	assert(copysign(100.0f32, 1.0f32) == 100.0f32);
	assert(copysign(100.0f32, -1.0f32) == -100.0f32);
	assert(copysign(100.0f64, 0.0f64) == 100.0f64);
	assert(copysign(100.0f64, -0.0f64) == -100.0f64);
	assert(copysign(0.0f64, 100.0f64) == 0.0f64);
	assert(sign(copysign(0.0f64, 100.0f64)) > 0);
	assert(copysign(0.0f64, -100.0f64) == 0.0f64);
	assert(sign(copysign(0.0f64, -100.0f64)) < 0);
};

// Takes a potentially subnormal f64 n and returns a normal f64 normal_float
// and an exponent exp such that n == normal_float * 2^{exp}.
export fn normalizef64(n: f64) (f64, i64) = {
	if (issubnormalf64(n)) {
		const factor = 1i64 << (F64_MANTISSA_BITS: i64);
		const normal_float = (n * (factor: f64));
		return (normal_float, -(F64_MANTISSA_BITS: i64));
	};
	return (n, 0);
};

// Takes a potentially subnormal f32 n and returns a normal f32 normal_float
// and an exponent exp such that n == normal_float * 2^{exp}.
export fn normalizef32(n: f32) (f64, i64) = {
	if (issubnormalf32(n)) {
		const factor = 1i32 << (F32_MANTISSA_BITS: i32);
		const normal_float = ((n * (factor: f64)): f64);
		return (normal_float, -(F32_MANTISSA_BITS: i64));
	};
	return (n, 0);
};

@test fn test_normalize() void = {
	let res = normalizef64(5.0e-320);
	assert(res.0 > F64_MIN_NORMAL);
	assert(res.1 < 0i64);
	res = normalizef64(5.0e-300);
	assert(res.0 == 5.0e-300);
	assert(res.1 == 0i64);
};

// Breaks a f64 down into its mantissa and exponent. The mantissa will be between 0.5 and
// 1.
export fn frexpf64(n: f64) (f64, i64) = {
	if (isnan(n) || isinf(n) || n == 0f64) {
		return (n, 0);
	};
	const normalized = normalizef64(n);
	const normal_float = normalized.0;
	const normalization_exp = normalized.1;
	const bits = f64bits(normal_float);
	const raw_exp: u64 = (bits >> F64_MANTISSA_BITS) & F64_EXPONENT_MASK;
	const exp: i64 = normalization_exp +
		(raw_exp: i64) - (F64_EXPONENT_BIAS: i64) + 1;
	const mantissa: f64 = f64frombits((bits & F64_EXP_REMOVAL_MASK) | F64_EXP_ZERO);
	return (mantissa, exp);
};

// Breaks a f32 down into its mantissa and exponent. The mantissa will be between 0.5 and
// 1.
export fn frexpf32(n: f32) (f64, i64) = {
	if (isnan(n) || isinf(n) || n == 0f32) {
		return (n, 0);
	};
	const normalized = normalizef32(n);
	const normal_float = normalized.0;
	const normalization_exp = normalized.1;
	const bits = f32bits(normal_float);
	const raw_exp: u64 = ((bits >> (F32_MANTISSA_BITS: u32)) &
		(F32_EXPONENT_MASK: u32));
	const exp: i64 = normalization_exp +
		(raw_exp: i64) - (F32_EXPONENT_BIAS: i64) + 1;
	const mantissa: f32 = f32frombits((bits & F32_EXP_REMOVAL_MASK) | F32_EXP_ZERO);
	return (mantissa, exp);
};

// Breaks a float down into its mantissa and exponent. The mantissa will be between 0.5
// and 1.
export fn frexp(n: types::floating) (f64, i64) = {
	return match (n) {
		n: f64 => frexpf64(n),
		n: f32 => frexpf32(n),
	};
};

@test fn test_frexp() void = {
	let res = frexp(3.0f64);
	assert(res.0 == 0.75f64);
	assert(res.1 == 2i64);
	res = frexp(2.42f64);
	assert(res.0 == 0.605f64);
	assert(res.1 == 2i64);
	res = frexp(NAN);
	assert(res.1 == 0);
	res = frexp(INF);
	assert(res.1 == 0);
};

// Creates an f64 from a mantissa and an exponent.
export fn ldexpf64(mantissa: f64, exp: i64) f64 = {
	if (isnan(mantissa) || isinf(mantissa) || mantissa == 0f64) {
		return mantissa;
	};
	const normalized = normalizef64(mantissa);
	const normal_float = normalized.0;
	const normalization_exp = normalized.1;
	const bits = f64bits(normal_float);
	const mantissa_exp = (((bits >> F64_MANTISSA_BITS) & F64_EXPONENT_MASK): i64) -
		(F64_EXPONENT_BIAS: i64);
	let res_exp = exp + normalization_exp + mantissa_exp;
	// Underflow
	if (res_exp < -(F64_EXPONENT_BIAS: i64) - (F64_MANTISSA_BITS: i64)) {
		return copysign(0.0f64, mantissa);
	};
	// Overflow
	if (res_exp > (F64_EXPONENT_BIAS: i64)) {
		if (mantissa < 0.0f64) {
			return -INF;
		} else {
			return INF;
		};
	};
	// Subnormal
	let subnormal_factor = 1.0f64;
	if (res_exp < -(F64_EXPONENT_BIAS: i64) + 1) {
		res_exp += (F64_MANTISSA_BITS: i64) - 1;
		subnormal_factor = 1.0f64 /
			((1i64 << ((F64_MANTISSA_BITS: i64) - 1)): f64);
	};
	const res: u64 = (bits & F64_EXP_REMOVAL_MASK) |
		(
			((res_exp: u64) + F64_EXPONENT_BIAS)
			<< F64_MANTISSA_BITS
		);
	return subnormal_factor * f64frombits(res);
};

// Creates an f32 from a mantissa and an exponent.
export fn ldexpf32(mantissa: f32, exp: i64) f32 = {
	if (isnan(mantissa) || isinf(mantissa) || mantissa == 0f32) {
		return mantissa;
	};
	const normalized = normalizef32(mantissa);
	const normal_float = normalized.0;
	const normalization_exp = normalized.1;
	const bits = f32bits(normal_float);
	const mantissa_exp = (((bits >> F32_MANTISSA_BITS) & F32_EXPONENT_MASK): i32) -
		(F32_EXPONENT_BIAS: i32);
	let res_exp = exp + normalization_exp + mantissa_exp;
	// Underflow
	if (res_exp < -(F32_EXPONENT_BIAS: i32) - (F32_MANTISSA_BITS: i32)) {
		return copysign(0.0f32, mantissa);
	};
	// Overflow
	if (res_exp > (F32_EXPONENT_BIAS: i32)) {
		if (mantissa < 0.0f32) {
			return -INF;
		} else {
			return INF;
		};
	};
	// Subnormal
	let subnormal_factor = 1.0f32;
	if (res_exp < -(F32_EXPONENT_BIAS: i32) + 1) {
		res_exp += (F32_MANTISSA_BITS: i32) - 1;
		subnormal_factor = 1.0f32 /
			((1i32 << ((F32_MANTISSA_BITS: i32) - 1)): f32);
	};
	const res: u32 = (bits & F32_EXP_REMOVAL_MASK) |
		(
			((res_exp: u32) + F32_EXPONENT_BIAS)
			<< (F32_MANTISSA_BITS: u32)
		);
	return subnormal_factor * f32frombits(res);
};

@test fn test_frexp_ldexp() void = {
	const tests64: [_]f64 = [INF, -INF,
		0.0, 1.0, -1.0, 2.42, 123456789.0,
		F64_MIN_NORMAL, F64_MAX_NORMAL,
		3.0e-310f64];
	for (let i = 0z; i < len(tests64); i += 1) {
		const parts = frexpf64(tests64[i]);
		const res64 = ldexpf64(parts.0, parts.1);
		assert(res64 == tests64[i]);
	};
	assert(ldexpf64(1.0f64, -1076i64) == 0.0f64);
	assert(ldexpf64(-1.0f64, -1076i64) == -0.0f64);
	assert(sign(ldexpf64(-1.0f64, -1076i64)) < 0);
	assert(ldexpf64(2.0f64, 1024i64) == INF);
	assert(ldexpf64(-2.0f64, 1024i64) == -INF);

	const tests32: [_]f32 = [INF, -INF,
		0.0, 1.0, -1.0, 2.42, 123456789.0,
		F32_MIN_NORMAL, F32_MAX_NORMAL,
		3.0e-39f32];
	for (let i = 0z; i < len(tests32); i += 1) {
		const parts = frexpf32(tests32[i]);
		const res = ldexpf32(parts.0, parts.1);
		assert(res == tests32[i]);
	};
	assert(ldexpf32(1.0f32, -1076i32) == 0.0f32);
	assert(ldexpf32(-1.0f32, -1076i32) == -0.0f32);
	assert(sign(ldexpf32(-1.0f32, -1076i32)) < 0);
	assert(ldexpf32(2.0f32, 1024i32) == INF);
	assert(ldexpf32(-2.0f32, 1024i32) == -INF);
};

// Returns the integer and fractional parts of an f64.
export fn modf64(n: f64) (f64, f64) = {
	if (n < 1.0f64) {
		if (n < 0.0f64) {
			let positive_parts = modf64(-n);
			return (-positive_parts.0, -positive_parts.1);
		};
		if (n == 0.0f64) {
			return (n, n);
		};
		return (0.0f64, n);
	};
	let bits = f64bits(n);
	const exp = (((bits >> F64_MANTISSA_BITS) & F64_EXPONENT_MASK): i64) -
		(F64_EXPONENT_BIAS: i64);
	// For exponent exp, all integers can be represented with the top exp
	// bits of the mantissa
	const sign_and_exp_bits = 64u64 - (F64_EXPONENT_BITS: u64) - 1u64;
	if (exp < (sign_and_exp_bits: i64)) {
		const bits_to_shift = (((sign_and_exp_bits: i64) - exp): u64);
		bits = bits & ~((1u64 << bits_to_shift) - 1);
	};
	const int_part = f64frombits(bits);
	const frac_part = n - int_part;
	return (int_part, frac_part);
};

// Returns the integer and fractional parts of an f32.
export fn modf32(n: f32) (f32, f32) = {
	if (n < 1.0f32) {
		if (n < 0.0f32) {
			let positive_parts = modf32(-n);
			return (-positive_parts.0, -positive_parts.1);
		};
		if (n == 0.0f32) {
			return (n, n);
		};
		return (0.0f32, n);
	};
	let bits = f32bits(n);
	const exp = (((bits >> F32_MANTISSA_BITS) & F32_EXPONENT_MASK): i32) -
		(F32_EXPONENT_BIAS: i32);
	// For exponent exp, all integers can be represented with the top exp
	// bits of the mantissa
	const sign_and_exp_bits = 32u32 - (F32_EXPONENT_BITS: u32) - 1u32;
	if (exp < (sign_and_exp_bits: i32)) {
		const bits_to_shift = (((sign_and_exp_bits: i32) - exp): u32);
		bits = bits & ~((1u32 << bits_to_shift) - 1);
	};
	const int_part = f32frombits(bits);
	const frac_part = n - int_part;
	return (int_part, frac_part);
};

@test fn test_modf() void = {
	// 64
	let res = modf64(1.75f64);
	assert(res.0 == 1.0f64);
	assert(res.1 == 0.75f64);
	res = modf64(0.75f64);
	assert(res.0 == 0.0f64);
	assert(res.1 == 0.75f64);
	res = modf64(-0.75f64);
	assert(res.0 == -0.0f64);
	assert(res.1 == -0.75f64);
	res = modf64(0.0f64);
	assert(res.0 == 0.0f64);
	assert(res.1 == 0.0f64);
	assert(sign(res.1) > 0);
	res = modf64(-0.0f64);
	assert(res.0 == -0.0f64);
	assert(res.1 == -0.0f64);
	assert(sign(res.1) < 0);

	// 32
	let res = modf32(1.75f32);
	assert(res.0 == 1.0f32);
	assert(res.1 == 0.75f32);
	res = modf32(0.75f32);
	assert(res.0 == 0.0f32);
	assert(res.1 == 0.75f32);
	res = modf32(-0.75f32);
	assert(res.0 == -0.0f32);
	assert(res.1 == -0.75f32);
	res = modf32(0.0f32);
	assert(res.0 == 0.0f32);
	assert(res.1 == 0.0f32);
	assert(sign(res.1) > 0);
	res = modf32(-0.0f32);
	assert(res.0 == -0.0f32);
	assert(res.0 == -0.0f32);
	assert(sign(res.1) < 0);
};