~apreiml/hare

8afe9b2c021a145da8e6b30352821a3d7a99a51c — Armin Preiml 1 year, 5 months ago a568f1f
bigint: initial port of moddiv

Signed-off-by: Armin Preiml <apreiml@strohwolke.at>
2 files changed, 449 insertions(+), 0 deletions(-)

A crypto/bigint/+test/moddiv_test.ha
A crypto/bigint/moddiv.ha
A crypto/bigint/+test/moddiv_test.ha => crypto/bigint/+test/moddiv_test.ha +17 -0
@@ 0,0 1,17 @@
use fmt;

@test fn moddiv() void = {
	let m = fromhex("38a39b403101");
	const m0i = ninv31(m[1]);

	let x = fromhexmod("093e9898b330", m);
	let y = fromhexmod("00000000f431", m);
	let tmp: [6]word = [0...];

	const result = moddiv(x, y, m, m0i, tmp);
	assert(equalshex(x, "09b10a30"));

	free(x);
	free(y);
	free(m);
};

A crypto/bigint/moddiv.ha => crypto/bigint/moddiv.ha +432 -0
@@ 0,0 1,432 @@
// The following code was initially ported from BearSSL.
//
// Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
use crypto::math::*;


// In this file, we handle big integers with a custom format, i.e.
// without the usual one-word header. Value is split into 31-bit words,
// each stored in a 32-bit slot (top bit is zero) in little-endian
// order. The length (in words) is provided explicitly. In some cases,
// the value can be negative (using two's complement representation). In
// some cases, the top word is allowed to have a 32th bit.

// Compute 'x'/'y' mod 'm', result in 'x'. Values 'x' and 'y' must be between 0
// and 'm'-1, and have the same announced bit length as 'm'. Modulus 'm' must be
// odd. The 'm0i' parameter is equal to -1/m mod 2^31 (see [[ninv]]). The array
// 't' must point to a temporary area that can hold at least three integers of
// the size of 'm' (without length field).
//
// 'm' may not overlap 'x' and 'y'. 'x' and 'y' may overlap each other (this can
// be useful to test whether a value is invertible modulo 'm'). 't' must be
// disjoint from all other arrays.
//
// Returned value is 1 on success, 0 otherwise. Success is attained if 'y' is
// invertible modulo 'm'.
export fn moddiv(
	x: []word,
	y: const []word,
	m: const []word,
	m0i: u32,
	t: []word,
) u32 = {
	assert(len(t) >= 3 * (len(m) - 1),
		"t must hold at least three integers of the size of m");

	// Algorithm is an extended binary GCD. We maintain four values
	// a, b, u and v, with the following invariants:
	//
	//   a * x = y * u mod m
	//   b * x = y * v mod m
	//
	// Starting values are:
	//
	//   a = y
	//   b = m
	//   u = x
	//   v = 0
	//
	// The formal definition of the algorithm is a sequence of steps:
	//
	//   - If a is even, then a <- a/2 and u <- u/2 mod m.
	//   - Otherwise, if b is even, then b <- b/2 and v <- v/2 mod m.
	//   - Otherwise, if a > b, then a <- (a-b)/2 and u <- (u-v)/2 mod m.
	//   - Otherwise, b <- (b-a)/2 and v <- (v-u)/2 mod m.
	//
	// Algorithm stops when a = b. At that point, they both are equal
	// to GCD(y,m); the modular division succeeds if that value is 1.
	// The result of the modular division is then u (or v: both are
	// equal at that point).
	//
	// Each step makes either a or b shrink by at least one bit; hence,
	// if m has bit length k bits, then 2k-2 steps are sufficient.
	//
	//
	// Though complexity is quadratic in the size of m, the bit-by-bit
	// processing is not very efficient. We can speed up processing by
	// remarking that the decisions are taken based only on observation
	// of the top and low bits of a and b.
	//
	// In the loop below, at each iteration, we use the two top words
	// of a and b, and the low words of a and b, to compute reduction
	// parameters pa, pb, qa and qb such that the new values for a
	// and b are:
	//
	//   a' = (a*pa + b*pb) / (2^31)
	//   b' = (a*qa + b*qb) / (2^31)
	//
	// the division being exact.
	//
	// Since the choices are based on the top words, they may be slightly
	// off, requiring an optional correction: if a' < 0, then we replace
	// pa with -pa, and pb with -pb. The total length of a and b is
	// thus reduced by at least 30 bits at each iteration.
	//
	// The stopping conditions are still the same, though: when a
	// and b become equal, they must be both odd (since m is odd,
	// the GCD cannot be even), therefore the next operation is a
	// subtraction, and one of the values becomes 0. At that point,
	// nothing else happens, i.e. one value is stuck at 0, and the
	// other one is the GCD.

	const mlen: size = (m[0] + 31) >> 5;
	let a = t[..mlen];
	let b = t[mlen..(mlen * 2)];
	let u = x[1..];
	let v = t[mlen * 2.. mlen * 3];
	//memcpy(a, y + 1, mlen * sizeof *y);
	a[..mlen] = y[1..mlen + 1];
	// memcpy(b, m + 1, mlen * sizeof *m);
	b[..mlen] = m[1..mlen + 1];
	// memset(v, 0, mlen * sizeof *v);
	//
	for (let i = 0z; i < len(v); i += 1) {
		v[i] = 0;
	};

	// Loop below ensures that a and b are reduced by some bits each,
	// for a total of at least 30 bits.
	for (let num: u32 = ((m[0] - (m[0] >> 5)) << 1) + 30; num >= 30; num -= 30) {
		// Extract top words of a and b. If j is the highest
		// index >= 1 such that a[j] != 0 or b[j] != 0, then we want
		// (a[j] << 31) + a[j - 1], and (b[j] << 31) + b[j - 1].
		// If a and b are down to one word each, then we use a[0]
		// and b[0].
		let c0: u32 = -1: u32;
		let c1 = -1: u32;
		let a0: u32 = 0;
		let a1: u32 = 0;
		let b0: u32 = 0;
		let b1: u32 = 0;
		let j = mlen - 1;
		for (j < mlen; j -= 1) {
			const aw = a[j];
			const bw = b[j];
			a0 ^= (a0 ^ aw) & c0;
			a1 ^= (a1 ^ aw) & c1;
			b0 ^= (b0 ^ bw) & c0;
			b1 ^= (b1 ^ bw) & c1;
			c1 = c0;
			c0 &= (((aw | bw) + 0x7FFFFFFF) >> 31) - 1: u32;
		};

		// If c1 = 0, then we grabbed two words for a and b.
		// If c1 != 0 but c0 = 0, then we grabbed one word. It
		// is not possible that c1 != 0 and c0 != 0, because that
		// would mean that both integers are zero.
		a1 |= a0 & c1;
		a0 &= ~c1;
		b1 |= b0 & c1;
		b0 &= ~c1;
		let a_hi: u64 = (a0: u64 << 31) + a1;
		let b_hi: u64 = (b0: u64 << 31) + b1;
		let a_lo = a[0];
		let b_lo = b[0];


		// Compute reduction factors:
		//
		//   a' = a*pa + b*pb
		//   b' = a*qa + b*qb
		//
		// such that a' and b' are both multiple of 2^31, but are
		// only marginally larger than a and b.
		let pa: i64 = 1;
		let pb: i64 = 0;
		let qa: i64 = 0;
		let qb: i64 = 1;
		for (let i = 0i; i < 31; i += 1) {
			// At each iteration:
			//
			//   a <- (a-b)/2 if: a is odd, b is odd, a_hi > b_hi
			//   b <- (b-a)/2 if: a is odd, b is odd, a_hi <= b_hi
			//   a <- a/2 if: a is even
			//   b <- b/2 if: a is odd, b is even
			//
			// We multiply a_lo and b_lo by 2 at each
			// iteration, thus a division by 2 really is a
			// non-multiplication by 2.

			// r = GT(a_hi, b_hi)
			// But the GT() function works on uint32_t operands,
			// so we inline a 64-bit version here.
			let rz: u64 = b_hi - a_hi;
			const r: u32 = ((rz ^ ((a_hi ^ b_hi)
				& (a_hi ^ rz))) >> 63): u32;

			// cAB = 1 if b must be subtracted from a
			// cBA = 1 if a must be subtracted from b
			// cA = 1 if a is divided by 2, 0 otherwise
			//
			// Rules:
			//
			//   cAB and cBA cannot be both 1.
			//   if a is not divided by 2, b is.
			let oa: word = (a_lo >> i: word): word & 1;
			let ob: word = (b_lo >> i: word): word & 1;
			let cAB: word = oa & ob & r;
			let cBA: word = oa & ob & notu32(r);
			let cA: word = cAB | notu32(oa);

			// Conditional subtractions.
			a_lo -= b_lo & -cAB;
			a_hi -= b_hi & -cAB: u64;
			pa -= qa & -(cAB: i64);
			pb -= qb & -(cAB: i64);
			b_lo -= a_lo & -cBA;
			b_hi -= a_hi & -(cBA: u64);
			qa -= pa & -(cBA: i64);
			qb -= pb & -(cBA: i64);

			// Shifting.
			a_lo += a_lo & (cA - 1);
			pa += pa & (cA: i64 - 1);
			pb += pb & (cA: i64 - 1);
			a_hi ^= (a_hi ^ (a_hi >> 1)) & -cA: u64;
			b_lo += b_lo & -cA;
			qa += qa & -(cA: i64);
			qb += qb & -(cA: i64);
			b_hi ^= (b_hi ^ (b_hi >> 1)) & (cA: u64 - 1);
		};

		// Replace a and b with new values a' and b'.
		const r: u32 = co_reduce(a, b, mlen, pa, pb, qa, qb);
		pa -= pa * ((r & 1) << 1): i64;
		pb -= pb * ((r & 1) << 1): i64;
		qa -= qa * (r & 2): i64;
		qb -= qb * (r & 2): i64;

		co_reduce_mod(u, v, mlen, pa, pb, qa, qb, m[1..], m0i);
	};

	// Now one of the arrays should be 0, and the other contains
	// the GCD. If a is 0, then u is 0 as well, and v contains
	// the division result.
	// Result is correct if and only if GCD is 1.
	let r = (a[0] | b[0]) ^ 1;
	u[0] |= v[0];
	for (let k = 1z; k < mlen; k += 1) {
		r |= a[k] | b[k];
		u[k] |= v[k];
	};
	return eq0u32(r);
};

// Compute:
//   a <- (a*pa+b*pb)/(2^31)
//   b <- (a*qa+b*qb)/(2^31)
// The division is assumed to be exact (i.e. the low word is dropped).
// If the final a is negative, then it is negated. Similarly for b.
// Returned value is the combination of two bits:
//   bit 0: 1 if a had to be negated, 0 otherwise
//   bit 1: 1 if b had to be negated, 0 otherwise
//
// Factors pa, pb, qa and qb must be at most 2^31 in absolute value.
// Source integers a and b must be nonnegative; top word is not allowed
// to contain an extra 32th bit.
fn co_reduce(
	a: []word,
	b: []word,
	alen: size, // TODO
	pa: i64,
	pb: i64,
	qa: i64,
	qb: i64,
) u32 = {
	let cca: i64 = 0;
	let ccb: i64 = 0;
	for (let k = 0z; k < alen; k += 1) {
		// Since:
		//   |pa| <= 2^31
		//   |pb| <= 2^31
		//   0 <= wa <= 2^31 - 1
		//   0 <= wb <= 2^31 - 1
		//   |cca| <= 2^32 - 1
		// Then:
		//   |za| <= (2^31-1)*(2^32) + (2^32-1) = 2^63 - 1
		//
		// Thus, the new value of cca is such that |cca| <= 2^32 - 1.
		// The same applies to ccb.
		const wa = a[k];
		const wb = b[k];
		const za: u64 = wa * pa: u64 + wb * pb: u64 + cca: u64;
		const zb: u64 = wa * qa: u64 + wb * qb: u64 + ccb: u64;
		if (k > 0) {
			a[k - 1] = za: word & 0x7FFFFFFF;
			b[k - 1] = zb: word & 0x7FFFFFFF;
		};

		// For the new values of cca and ccb, we need a signed
		// right-shift; since, in C, right-shifting a signed
		// negative value is implementation-defined, we use a
		// custom portable sign extension expression.
		const m: u64 = 1 << 32;
		let tta: u64 = za >> 31;
		let ttb: u64 = zb >> 31;
		tta = (tta ^ m) - m;
		ttb = (ttb ^ m) - m;
		// cca = *(int64_t *)&tta;
		// ccb = *(int64_t *)&ttb;
		cca = tta: i64; // TODO test this
		ccb = ttb: i64;
	};
	a[alen - 1] = cca: u32;
	b[alen - 1] = ccb: u32;

	const nega: u32 = (cca: u64 >> 63): u32;
	const negb: u32 = (ccb: u64 >> 63): u32;
	cond_negate(a, alen, nega);
	cond_negate(b, alen, negb);
	return nega | (negb << 1);
};

// Compute:
//   a <- (a*pa+b*pb)/(2^31) mod m
//   b <- (a*qa+b*qb)/(2^31) mod m
//
// m0i is equal to -1/m[0] mod 2^31.
//
// Factors pa, pb, qa and qb must be at most 2^31 in absolute value.
// Source integers a and b must be nonnegative; top word is not allowed
// to contain an extra 32th bit.
fn co_reduce_mod(
	a: []word,
	b: []word,
	alen: size, // TODO
	pa: i64,
	pb: i64,
	qa: i64,
	qb: i64,
	m: const []word,
	m0i: u32,
) void = {
	let cca: i64 = 0;
	let ccb: i64 = 0;
	let fa: u32 = ((a[0] * pa: u32 + b[0] * pb: u32) * m0i) & 0x7FFFFFFF;
	let fb: u32 = ((a[0] * qa: u32 + b[0] * qb: u32) * m0i) & 0x7FFFFFFF;
	for (let k = 0z; k < alen; k += 1) {

		// In this loop, carries 'cca' and 'ccb' always fit on
		// 33 bits (in absolute value).
		let wa = a[k];
		let wb = b[k];
		let za: u64 = wa * pa: u64 + wb * pb: u64
			+ m[k] * fa: u64 + cca: u64;
		let zb: u64 = wa * qa: u64 + wb * qb: u64
			+ m[k] * fb: u64 + ccb: u64;
		if (k > 0) {
			a[k - 1] = za: word & 0x7FFFFFFF;
			b[k - 1] = zb: word & 0x7FFFFFFF;
		};

		const m: u64 = 1 << 32;
		let tta: u64 = za >> 31;
		let ttb: u64 = zb >> 31;
		tta = (tta ^ m) - m;
		ttb = (ttb ^ m) - m;
		// cca = *(int64_t *)&tta;
		// ccb = *(int64_t *)&ttb;
		cca = tta: i64; // TODO test this
		ccb = ttb: i64;
	};
	a[alen - 1] = cca: u32;
	b[alen - 1] = ccb: u32;

	// At this point:
	//   -m <= a < 2*m
	//   -m <= b < 2*m
	// (this is a case of Montgomery reduction)
	// The top word of 'a' and 'b' may have a 32-th bit set.
	// We may have to add or subtract the modulus.
	finish_mod(a, alen, m, (cca: u64 >> 63): u32);
	finish_mod(b, alen, m, (ccb: u64 >> 63): u32);
};

// Finish modular reduction. Rules on input parameters:
//
//   if neg = 1, then -m <= a < 0
//   if neg = 0, then 0 <= a < 2*m
//
// If neg = 0, then the top word of a[] may use 32 bits.
//
// Also, modulus m must be odd.
fn finish_mod(a: []word, alen: size, m: const []word, neg: u32) void = {
	// First pass: compare a (assumed nonnegative) with m.
	// Note that if the final word uses the top extra bit, then
	// subtracting m must yield a value less than 2^31, since we
	// assumed that a < 2*m.
	let cc: u32 = 0;
	for (let k = 0z; k < alen; k += 1) {
		const aw = a[k];
		const mw = m[k];
		cc = (aw - mw - cc) >> 31;
	};

	// At this point:
	//   if neg = 1, then we must add m (regardless of cc)
	//   if neg = 0 and cc = 0, then we must subtract m
	//   if neg = 0 and cc = 1, then we must do nothing
	const xm: u32 = -neg >> 1;
	const ym: u32 = -(neg | (1 - cc));
	cc = neg;
	for (let k = 0z; k < alen; k += 1) {
		let aw = a[k];
		const mw = (m[k] ^ xm) & ym;
		aw = aw - mw - cc;
		a[k] = aw & 0x7FFFFFFF;
		cc = aw >> 31;
	};
};

// Negate big integer conditionally. The value consists of 'len' words,
// with 31 bits in each word (the top bit of each word should be 0,
// except possibly for the last word). If 'ctl' is 1, the negation is
// computed; otherwise, if 'ctl' is 0, then the value is unchanged.
fn cond_negate(a: []word, alen: size, ctl: u32) void = {
	let cc = ctl;
	const xm: u32 = -ctl >> 1;
	for (let k = 0z; k < alen; k += 1) {
		let aw = a[k];
		aw = (aw ^ xm) + cc;
		a[k] = aw & 0x7FFFFFFF;
		cc = aw >> 31;
	};
};