~vyivel/libnyth

f4f60233b056339ad9c9e5dca7a1be444b34bcd8 — Kirill Primak 5 months ago e11b855 v0.1.0
snconv: update, allow plus signs
2 files changed, 71 insertions(+), 43 deletions(-)

M src/snconv.c
M tests/test_scalar_as.c
M src/snconv.c => src/snconv.c +65 -40
@@ 8,11 8,9 @@
// https://nigeltao.github.io/blog/2020/parse-number-f64-simple.html
// Their implementations are loosely based on Go's.
//
// Note that snconv_stof() is stricter than strtod() on the input, effectively
// requiring it to match JSON's grammar:
// https://datatracker.ietf.org/doc/html/rfc8259#section-6
// Note that snconv_stof() is stricter than strtod() on the input.
// This means no infinities, NaNs, leading zeros, leading whitespace, empty
// integer or fractional parts, or leading plus sign.
// integer or fractional parts.
//
// For float→string, Dragonbox algorithm is used, as described here:
// https://github.com/jk-jeon/dragonbox/blob/master/other_files/Dragonbox.pdf


@@ 24,8 22,18 @@
//
// This file also has string→integer and integer→string conversion logic,
// which is relatively simple.
//
// "Why can't you just use strtod()/snprintf() with thread-local uselocale(3)?"
// That's a hack. ¯\_(ツ)_/¯
// For strtod(), I would need to ensure the correct format anyway.
// snprintf() results in imprecise and/or ugly representations, and making it
// behave properly sounds like yet another hack.
//
// Also, the following code assumes that `double` is an IEEE 754 compliant
// 64-bit floating point number type. If it's not, good luck.
//
// tl;dr: C stdlib is bad, locales are stupid, so here we are

#include <math.h>
#include <stdbool.h>
#include <stdint.h>
#include <string.h>


@@ 40,6 48,8 @@
#define MANT_MASK ((1UL << MANT_PRECISION) - 1)
#define SIGN_MASK (1UL << (EXP_PRECISION + MANT_PRECISION))

#define INFINITY_MASK ((uint64_t)EXP_MASK << MANT_PRECISION)

// Arbitrary number that is big enough
#define HPD_MAXLEN 800
// 64 bit for the accumulator - 4 bits for 1 decimal digit


@@ 1161,13 1171,14 @@ static uint64_t hpd_get_rounded(struct hpd *hpd) {
	return d;
}

static double hpd_to_float(struct hpd *hpd) {
static bool hpd_to_float(struct hpd *hpd, double *d) {
	if (hpd->len == 0 || hpd->point < -326) {
		// 0 or underflow
		return hpd->negative ? -0.0 : 0.0;
		*d = hpd->negative ? -0.0 : 0.0;
		return true;
	} else if (hpd->point > 310) {
		// Overflow
		return INFINITY;
		return false;
	}

	int exp2 = BIAS;


@@ 1208,7 1219,7 @@ static double hpd_to_float(struct hpd *hpd) {
	}
	if (exp2 >= EXP_MASK) {
		// Exponent is too big
		return INFINITY;
		return false;
	}

	hpd_left_shift(hpd, MANT_PRECISION + 1);


@@ 1218,7 1229,7 @@ static double hpd_to_float(struct hpd *hpd) {
		m >>= 1;
		exp2 += 1;
		if (exp2 >= EXP_MASK) {
			return INFINITY;
			return false;
		}
	}



@@ 1234,7 1245,8 @@ static double hpd_to_float(struct hpd *hpd) {
	if (hpd->negative) {
		conv.u64 |= SIGN_MASK;
	}
	return conv.d;
	*d = conv.d;
	return true;
}

// Fast decimal


@@ 1257,30 1269,33 @@ static void dec_trim(struct dec *dec) {
	}
}

static double dec_to_float_exact(struct dec *dec) {
static bool dec_to_float_exact(struct dec *dec, double *d) {
	if (dec->m > MANT_MASK) {
		return NAN;
		return false;
	}
	double d = dec->negative ? -(double)dec->m : (double)dec->m;
	*d = dec->negative ? -(double)dec->m : (double)dec->m;
	if (dec->exp >= 0) {
		if (dec->exp <= F64_MAXPOW10) {
			return d * f64_pow10[dec->exp];
			*d *= f64_pow10[dec->exp];
			return true;
		}
	} else {
		if (dec->exp >= -F64_MAXPOW10) {
			return d / f64_pow10[-dec->exp];
			*d /= f64_pow10[-dec->exp];
			return true;
		}
	}
	return NAN;
	return false;
}

static double dec_to_float_eisel_lemire(struct dec *dec) {
static double dec_to_float_eisel_lemire(struct dec *dec, double *d) {
	if (dec->m == 0) {
		return dec->negative ? -0.0 : 0.0;
		*d = dec->negative ? -0.0 : 0.0;
		return true;
	}
	if (dec->exp < F128_POW10DATA_MIN ||
			dec->exp > F128_POW10DATA_MAX) {
		return NAN;
		return false;
	}

	struct u128 p10m = f128_pow10[dec->exp - F128_POW10DATA_MIN];


@@ 1306,7 1321,7 @@ static double dec_to_float_eisel_lemire(struct dec *dec) {
		}
		if ((x.hi & 0x1ff) == 0x1ff && lo + 1 == 0 && y.lo + m < m) {
			// Still ambiguous
			return NAN;
			return false;
		}
		x.lo = lo;
	}


@@ 1318,7 1333,7 @@ static double dec_to_float_eisel_lemire(struct dec *dec) {

	if (x.lo == 0 && (x.hi & 0x1ff) == 0 && (m & 3) == 1) {
		// Ambiguous rounding ("500" case)
		return NAN;
		return false;
	}

	// Shift m to a 53-bit number


@@ 1330,7 1345,7 @@ static double dec_to_float_eisel_lemire(struct dec *dec) {

	if (exp2 < 1 || exp2 >= EXP_MASK) {
		// Exponent underflow/overflow
		return NAN;
		return false;
	}

	// Construct the result


@@ 1339,7 1354,8 @@ static double dec_to_float_eisel_lemire(struct dec *dec) {
	if (dec->negative) {
		conv.u64 |= SIGN_MASK;
	}
	return conv.d;
	*d = conv.d;
	return true;
}

static void dec_from_float_dragonbox_normal(struct dec *dec,


@@ 1516,11 1532,15 @@ static bool parse_float(const char *str, size_t len,
	if (str[0] == '-') {
		negative = true;
		++ptr;
		if (ptr == len) {
			// A sign without integer part digits
			return false;
		}
	} else if (str[0] == '+') {
		++ptr;
	}

	if (ptr == len) {
		// A sign without integer part digits
		return false;
	}

	hpd->negative = negative;
	dec->negative = negative;



@@ 1664,10 1684,14 @@ enum snconv_status snconv_stoi(const char *str, size_t len, int64_t *i) {
	if (str[0] == '-') {
		negative = true;
		++ptr;
		if (len == 1) {
			return SNCONV_BAD_FORMAT;
		}
	} else if (str[0] == '+') {
		++ptr;
	}

	if (ptr == len) {
		return SNCONV_BAD_FORMAT;
	}

	bool lz = false;
	uint64_t u64 = 0;
	for (; ptr < len; ptr++) {


@@ 1718,29 1742,30 @@ enum snconv_status snconv_stof(const char *str, size_t len, double *d) {
	if (!dec.truncated) {
		// Before doing anything smart, check if the number can be
		// represented as f64 exactly; this is the fastest path.
		*d = dec_to_float_exact(&dec);
		if (!isnan(*d)) {
		if (dec_to_float_exact(&dec, d)) {
			return SNCONV_OK;
		}
		// Try Eisel-Lemire, which is expected to handle most of
		// the cases; fast as well.
		*d = dec_to_float_eisel_lemire(&dec);
		if (!isnan(*d)) {
		if (dec_to_float_eisel_lemire(&dec, d)) {
			return SNCONV_OK;
		}
	}
	// The slow path
	*d = hpd_to_float(&hpd);
	if (!isfinite(*d)) {
		return SNCONV_OVERFLOW;
	if (hpd_to_float(&hpd, d)) {
		return SNCONV_OK;
	}
	return SNCONV_OK;
	return SNCONV_OVERFLOW;
}

size_t snconv_ftos(double d, char buf[static SNCONV_FTOS_MAXSIZE]) {
	if (!isfinite(d)) {
	union conv conv;
	conv.d = d;
	if ((conv.u64 & INFINITY_MASK) == INFINITY_MASK) {
		// Infinity or NaN
		return 0;
	}

	struct dec dec;
	dec_from_float(&dec, d);


M tests/test_scalar_as.c => tests/test_scalar_as.c +6 -3
@@ 35,6 35,9 @@ int main(void) {
	s = scalar("100");
	assert(nyth_scalar_as_int(&s, &i) == NYTH_ERROR_NONE && i == 100);

	s = scalar("+123");
	assert(nyth_scalar_as_int(&s, &i) == NYTH_ERROR_NONE && i == 123);

	s = scalar("-401");
	assert(nyth_scalar_as_int(&s, &i) == NYTH_ERROR_NONE && i == -401);



@@ 53,7 56,7 @@ int main(void) {
	s = scalar("01");
	assert(nyth_scalar_as_int(&s, &i) == NYTH_ERROR_FORMAT_MISMATCH);

	s = scalar("+123");
	s = scalar("+");
	assert(nyth_scalar_as_int(&s, &i) == NYTH_ERROR_FORMAT_MISMATCH);

	double f;


@@ 66,7 69,7 @@ int main(void) {
	assert(nyth_scalar_as_float(&s, &f) == NYTH_ERROR_NONE &&
		f == -0.000000123);

	s = scalar("1e308");
	s = scalar("+1e308");
	assert(nyth_scalar_as_float(&s, &f) == NYTH_ERROR_NONE &&
		f == 1e308);



@@ 76,7 79,7 @@ int main(void) {
	s = scalar("01");
	assert(nyth_scalar_as_float(&s, &f) == NYTH_ERROR_FORMAT_MISMATCH);

	s = scalar("+1");
	s = scalar("-");
	assert(nyth_scalar_as_float(&s, &f) == NYTH_ERROR_FORMAT_MISMATCH);

	s = scalar("Infinity");