@@ 8,11 8,9 @@
// https://nigeltao.github.io/blog/2020/parse-number-f64-simple.html
// Their implementations are loosely based on Go's.
//
-// Note that snconv_stof() is stricter than strtod() on the input, effectively
-// requiring it to match JSON's grammar:
-// https://datatracker.ietf.org/doc/html/rfc8259#section-6
+// Note that snconv_stof() is stricter than strtod() on the input.
// This means no infinities, NaNs, leading zeros, leading whitespace, empty
-// integer or fractional parts, or leading plus sign.
+// integer or fractional parts.
//
// For float→string, Dragonbox algorithm is used, as described here:
// https://github.com/jk-jeon/dragonbox/blob/master/other_files/Dragonbox.pdf
@@ 24,8 22,18 @@
//
// This file also has string→integer and integer→string conversion logic,
// which is relatively simple.
+//
+// "Why can't you just use strtod()/snprintf() with thread-local uselocale(3)?"
+// That's a hack. ¯\_(ツ)_/¯
+// For strtod(), I would need to ensure the correct format anyway.
+// snprintf() results in imprecise and/or ugly representations, and making it
+// behave properly sounds like yet another hack.
+//
+// Also, the following code assumes that `double` is an IEEE 754 compliant
+// 64-bit floating point number type. If it's not, good luck.
+//
+// tl;dr: C stdlib is bad, locales are stupid, so here we are
-#include <math.h>
#include <stdbool.h>
#include <stdint.h>
#include <string.h>
@@ 40,6 48,8 @@
#define MANT_MASK ((1UL << MANT_PRECISION) - 1)
#define SIGN_MASK (1UL << (EXP_PRECISION + MANT_PRECISION))
+#define INFINITY_MASK ((uint64_t)EXP_MASK << MANT_PRECISION)
+
// Arbitrary number that is big enough
#define HPD_MAXLEN 800
// 64 bit for the accumulator - 4 bits for 1 decimal digit
@@ 1161,13 1171,14 @@ static uint64_t hpd_get_rounded(struct hpd *hpd) {
return d;
}
-static double hpd_to_float(struct hpd *hpd) {
+static bool hpd_to_float(struct hpd *hpd, double *d) {
if (hpd->len == 0 || hpd->point < -326) {
// 0 or underflow
- return hpd->negative ? -0.0 : 0.0;
+ *d = hpd->negative ? -0.0 : 0.0;
+ return true;
} else if (hpd->point > 310) {
// Overflow
- return INFINITY;
+ return false;
}
int exp2 = BIAS;
@@ 1208,7 1219,7 @@ static double hpd_to_float(struct hpd *hpd) {
}
if (exp2 >= EXP_MASK) {
// Exponent is too big
- return INFINITY;
+ return false;
}
hpd_left_shift(hpd, MANT_PRECISION + 1);
@@ 1218,7 1229,7 @@ static double hpd_to_float(struct hpd *hpd) {
m >>= 1;
exp2 += 1;
if (exp2 >= EXP_MASK) {
- return INFINITY;
+ return false;
}
}
@@ 1234,7 1245,8 @@ static double hpd_to_float(struct hpd *hpd) {
if (hpd->negative) {
conv.u64 |= SIGN_MASK;
}
- return conv.d;
+ *d = conv.d;
+ return true;
}
// Fast decimal
@@ 1257,30 1269,33 @@ static void dec_trim(struct dec *dec) {
}
}
-static double dec_to_float_exact(struct dec *dec) {
+static bool dec_to_float_exact(struct dec *dec, double *d) {
if (dec->m > MANT_MASK) {
- return NAN;
+ return false;
}
- double d = dec->negative ? -(double)dec->m : (double)dec->m;
+ *d = dec->negative ? -(double)dec->m : (double)dec->m;
if (dec->exp >= 0) {
if (dec->exp <= F64_MAXPOW10) {
- return d * f64_pow10[dec->exp];
+ *d *= f64_pow10[dec->exp];
+ return true;
}
} else {
if (dec->exp >= -F64_MAXPOW10) {
- return d / f64_pow10[-dec->exp];
+ *d /= f64_pow10[-dec->exp];
+ return true;
}
}
- return NAN;
+ return false;
}
-static double dec_to_float_eisel_lemire(struct dec *dec) {
+static double dec_to_float_eisel_lemire(struct dec *dec, double *d) {
if (dec->m == 0) {
- return dec->negative ? -0.0 : 0.0;
+ *d = dec->negative ? -0.0 : 0.0;
+ return true;
}
if (dec->exp < F128_POW10DATA_MIN ||
dec->exp > F128_POW10DATA_MAX) {
- return NAN;
+ return false;
}
struct u128 p10m = f128_pow10[dec->exp - F128_POW10DATA_MIN];
@@ 1306,7 1321,7 @@ static double dec_to_float_eisel_lemire(struct dec *dec) {
}
if ((x.hi & 0x1ff) == 0x1ff && lo + 1 == 0 && y.lo + m < m) {
// Still ambiguous
- return NAN;
+ return false;
}
x.lo = lo;
}
@@ 1318,7 1333,7 @@ static double dec_to_float_eisel_lemire(struct dec *dec) {
if (x.lo == 0 && (x.hi & 0x1ff) == 0 && (m & 3) == 1) {
// Ambiguous rounding ("500" case)
- return NAN;
+ return false;
}
// Shift m to a 53-bit number
@@ 1330,7 1345,7 @@ static double dec_to_float_eisel_lemire(struct dec *dec) {
if (exp2 < 1 || exp2 >= EXP_MASK) {
// Exponent underflow/overflow
- return NAN;
+ return false;
}
// Construct the result
@@ 1339,7 1354,8 @@ static double dec_to_float_eisel_lemire(struct dec *dec) {
if (dec->negative) {
conv.u64 |= SIGN_MASK;
}
- return conv.d;
+ *d = conv.d;
+ return true;
}
static void dec_from_float_dragonbox_normal(struct dec *dec,
@@ 1516,11 1532,15 @@ static bool parse_float(const char *str, size_t len,
if (str[0] == '-') {
negative = true;
++ptr;
- if (ptr == len) {
- // A sign without integer part digits
- return false;
- }
+ } else if (str[0] == '+') {
+ ++ptr;
}
+
+ if (ptr == len) {
+ // A sign without integer part digits
+ return false;
+ }
+
hpd->negative = negative;
dec->negative = negative;
@@ 1664,10 1684,14 @@ enum snconv_status snconv_stoi(const char *str, size_t len, int64_t *i) {
if (str[0] == '-') {
negative = true;
++ptr;
- if (len == 1) {
- return SNCONV_BAD_FORMAT;
- }
+ } else if (str[0] == '+') {
+ ++ptr;
+ }
+
+ if (ptr == len) {
+ return SNCONV_BAD_FORMAT;
}
+
bool lz = false;
uint64_t u64 = 0;
for (; ptr < len; ptr++) {
@@ 1718,29 1742,30 @@ enum snconv_status snconv_stof(const char *str, size_t len, double *d) {
if (!dec.truncated) {
// Before doing anything smart, check if the number can be
// represented as f64 exactly; this is the fastest path.
- *d = dec_to_float_exact(&dec);
- if (!isnan(*d)) {
+ if (dec_to_float_exact(&dec, d)) {
return SNCONV_OK;
}
// Try Eisel-Lemire, which is expected to handle most of
// the cases; fast as well.
- *d = dec_to_float_eisel_lemire(&dec);
- if (!isnan(*d)) {
+ if (dec_to_float_eisel_lemire(&dec, d)) {
return SNCONV_OK;
}
}
// The slow path
- *d = hpd_to_float(&hpd);
- if (!isfinite(*d)) {
- return SNCONV_OVERFLOW;
+ if (hpd_to_float(&hpd, d)) {
+ return SNCONV_OK;
}
- return SNCONV_OK;
+ return SNCONV_OVERFLOW;
}
size_t snconv_ftos(double d, char buf[static SNCONV_FTOS_MAXSIZE]) {
- if (!isfinite(d)) {
+ union conv conv;
+ conv.d = d;
+ if ((conv.u64 & INFINITY_MASK) == INFINITY_MASK) {
+ // Infinity or NaN
return 0;
}
+
struct dec dec;
dec_from_float(&dec, d);
@@ 35,6 35,9 @@ int main(void) {
s = scalar("100");
assert(nyth_scalar_as_int(&s, &i) == NYTH_ERROR_NONE && i == 100);
+ s = scalar("+123");
+ assert(nyth_scalar_as_int(&s, &i) == NYTH_ERROR_NONE && i == 123);
+
s = scalar("-401");
assert(nyth_scalar_as_int(&s, &i) == NYTH_ERROR_NONE && i == -401);
@@ 53,7 56,7 @@ int main(void) {
s = scalar("01");
assert(nyth_scalar_as_int(&s, &i) == NYTH_ERROR_FORMAT_MISMATCH);
- s = scalar("+123");
+ s = scalar("+");
assert(nyth_scalar_as_int(&s, &i) == NYTH_ERROR_FORMAT_MISMATCH);
double f;
@@ 66,7 69,7 @@ int main(void) {
assert(nyth_scalar_as_float(&s, &f) == NYTH_ERROR_NONE &&
f == -0.000000123);
- s = scalar("1e308");
+ s = scalar("+1e308");
assert(nyth_scalar_as_float(&s, &f) == NYTH_ERROR_NONE &&
f == 1e308);
@@ 76,7 79,7 @@ int main(void) {
s = scalar("01");
assert(nyth_scalar_as_float(&s, &f) == NYTH_ERROR_FORMAT_MISMATCH);
- s = scalar("+1");
+ s = scalar("-");
assert(nyth_scalar_as_float(&s, &f) == NYTH_ERROR_FORMAT_MISMATCH);
s = scalar("Infinity");