~q3cpma/mus

a117fc702a6ffee1a896dd0fbe22822c1bf40738 — q3cpma 1 year, 2 months ago f2345ed
Switch from musl's drand48 implementation to xoshiro128+ as it is simpler,
faster in microbenchmarks and doesn't output double when we want float.

Use a fixed random seed because dither noise doesn't need to be unreproducible.

Use -Ofast for mus_player if available.
10 files changed, 60 insertions(+), 72 deletions(-)

M mus_client
M mus_player/build_player.sh
M mus_player/filter.c
M mus_player/misc.c
M mus_player/misc.h
M mus_player/mus_player.c
D mus_player/musl_rand48.c
D mus_player/musl_rand48.h
A mus_player/xoshiro128plus.c
A mus_player/xoshiro128plus.h
M mus_client => mus_client +1 -2
@@ 1,8 1,7 @@
#!/bin/sh
# Dependencies: flock(1)
# Portability:	GNU, *BSD, MacOS
# Portability:	GNU, *BSD (minus Open)
set -eu
rpath_bin=$(dirname -- "$0")
rpath_cbin=$(dirname -- "$0")/mus_player
rpath_share=$(dirname -- "$0")
. "$rpath_share"/mus_util.sh

M mus_player/build_player.sh => mus_player/build_player.sh +1 -1
@@ 140,7 140,7 @@ case "$(tolower "$CONFIG")" in
		append_cppflag -DNDEBUG
		;;
	release)
		append_cflag -O3
		test_append_cflag -Ofast || append_cflag -O3
		append_cppflag -DNDEBUG
		append_ldflag -s
		test_append_ldflag -Wl,-O1

M mus_player/filter.c => mus_player/filter.c +2 -2
@@ 2,7 2,7 @@

#include "filter.h"
#include "misc.h"
#include "musl_rand48.h"
#include "xoshiro128plus.h"


void interleave16_stereo(const int32_t *restrict inbuf[],


@@ 19,7 19,7 @@ void interleave16_stereo(const int32_t *restrict inbuf[],
static inline int16_t triangle_dither(float x)
{
	static float prev_rand = 0.f;
	const float rand = _drand48();
	const float rand = frand();
	const float tmp = x * INT16_HEADROOM_MULT + rand - prev_rand;
	prev_rand = rand;
	return lrintf(tmp);

M mus_player/misc.c => mus_player/misc.c +0 -11
@@ 15,7 15,6 @@

#include "log.h"
#include "misc.h"
#include "musl_rand48.h"


bool isbigendian(void)


@@ 216,13 215,3 @@ void * xmalloc(size_t size)
		LOG_DIE("xmalloc(%zu)", size);
	return p;
}

void rand_init(void)
{
	int fd = xopen("/dev/urandom", O_RDONLY);
	long int seed;
	if (!read_full(fd, &seed, sizeof(long int)))
		LOG_DIE("/dev/urandom: %s", strerror(errno));
	_srand48(seed);
	xclose(fd);
}

M mus_player/misc.h => mus_player/misc.h +0 -3
@@ 76,6 76,3 @@ void *xstrdup(const char *s);

/* malloc() that aborts instead of returning NULL */
void *xmalloc(size_t size);

/* Initialize some PRNG seeds using /dev/urandom */
void rand_init(void);

M mus_player/mus_player.c => mus_player/mus_player.c +0 -1
@@ 120,7 120,6 @@ int main(int argc, char **argv)
{
	signals_nointerrupt();
	log_init(LOG_TYPE_STDERR, PROG_NAME);
	rand_init();

	ReplayGain_type rgtype = ALBUM_GAIN;
	char *ao_dri_name = NULL, *notify_cmd = NULL, *ipc_sock = NULL;

D mus_player/musl_rand48.c => mus_player/musl_rand48.c +0 -46
@@ 1,46 0,0 @@
#include <stdint.h>
#include <string.h>

#include "musl_rand48.h"


static unsigned short __seed48[7] = { 0, 0, 0, 0xe66d, 0xdeec, 0x5, 0xb };

static unsigned short *_seed48(unsigned short *s)
{
	static unsigned short p[3];
	memcpy(p, __seed48, sizeof p);
	memcpy(__seed48, s, sizeof p);
	return p;
}

void _srand48(long seed)
{
	_seed48((unsigned short [3]){ 0x330e, seed, seed>>16 });
}

static uint64_t __rand48_step(unsigned short *xi, unsigned short *lc)
{
	uint64_t a, x;
	x = xi[0] | (xi[1] + (0U<<16)) | (xi[2] + (0ULL<<32));
	a = lc[0] | (lc[1] + (0U<<16)) | (lc[2] + (0ULL<<32));
	x = a*x + lc[3];
	xi[0] = x;
	xi[1] = x>>16;
	xi[2] = x>>32;
	return x & 0xffffffffffffull;
}

static double _erand48(unsigned short s[3])
{
	union {
		uint64_t u;
		double f;
	} x = { 0x3ff0000000000000ULL | __rand48_step(s, __seed48 + 3) << 4};
	return x.f - 1.0;
}

double _drand48(void)
{
	return _erand48(__seed48);
}

D mus_player/musl_rand48.h => mus_player/musl_rand48.h +0 -6
@@ 1,6 0,0 @@
#pragma once
/* Copy pasted from musl libc 6ad514e4e278f0c3b18eb2db1d45638c9af1c07f
 * (2019-08-18) to make triangle_dither faster (the compiler can do
 * interprocedural optimizations like inlining with it) */
void _srand48(long seed);
double _drand48(void);

A mus_player/xoshiro128plus.c => mus_player/xoshiro128plus.c +27 -0
@@ 0,0 1,27 @@
#include "xoshiro128plus.h"

static inline uint32_t rotl(const uint32_t x, int k) {
	return (x << k) | (x >> (32 - k));
}


/* Random values found with
   $ od -An -N16 -t u4 /dev/random
   on a linux-5.8.10 system */
static uint32_t s[4] = {1843353603, 1301329321, 769380932, 502473458};

uint32_t next(void) {
	const uint32_t result = s[0] + s[3];
	const uint32_t t = s[1] << 9;

	s[2] ^= s[0];
	s[3] ^= s[1];
	s[1] ^= s[2];
	s[0] ^= s[3];

	s[2] ^= t;

	s[3] = rotl(s[3], 11);

	return result;
}

A mus_player/xoshiro128plus.h => mus_player/xoshiro128plus.h +29 -0
@@ 0,0 1,29 @@
/*  Written in 2018 by David Blackman and Sebastiano Vigna (vigna@acm.org)

To the extent possible under law, the author has dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.

See <http://creativecommons.org/publicdomain/zero/1.0/>. */

/* This is xoshiro128+ 1.0, our best and fastest 32-bit generator for 32-bit
   floating-point numbers. We suggest to use its upper bits for
   floating-point generation, as it is slightly faster than xoshiro128**.
   It passes all tests we are aware of except for
   linearity tests, as the lowest four bits have low linear complexity, so
   if low linear complexity is not considered an issue (as it is usually
   the case) it can be used to generate 32-bit outputs, too.

   We suggest to use a sign test to extract a random Boolean value, and
   right shifts to extract subsets of bits.

   The state must be seeded so that it is not everywhere zero. */

#include <stdint.h>

uint32_t next(void);

static inline float frand(void)
{
	return (next() >> 8) * 0x1.0p-24f;
}