You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
188 lines
5.6 KiB
188 lines
5.6 KiB
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <time.h>
|
|
#include <assert.h>
|
|
#include <stdio.h>
|
|
|
|
#include "crand.h"
|
|
|
|
#define internal _export(internal)
|
|
|
|
typedef unsigned short uint48_t[3];
|
|
|
|
struct jr_state
|
|
{
|
|
union {
|
|
long result;
|
|
double fresult;
|
|
};
|
|
struct drand48_data data;
|
|
union {
|
|
//unsigned short xsubi[3];
|
|
uint48_t xsubi;
|
|
struct {
|
|
uint64_t xsubh : 48;
|
|
uint16_t _xsub : 16;
|
|
};
|
|
uint64_t xsubl;
|
|
} st;
|
|
};
|
|
typedef __typeof(((struct jr_state*)NULL)->st) jr_xsub_t;
|
|
|
|
_Static_assert( sizeof(uint48_t) == (sizeof(uint16_t) * 3), "bad uint48 (ushort[3])");
|
|
_Static_assert( sizeof(((struct jr_state*)NULL)->st) == sizeof(uint64_t), "bad uint64 (union st)");
|
|
|
|
inline static unsigned short* IFUNC_DEF(_jr_st_resolv, (jr_xsub_t* restrict state)
|
|
__attribute__((const, nonnull, returns_nonnull, access(none, 1))));
|
|
|
|
inline static unsigned short* IFUNC_IMPL(_jr_st_resolv, low) (jr_xsub_t* restrict state)
|
|
{
|
|
return state->xsubi;
|
|
}
|
|
inline static unsigned short* IFUNC_IMPL(_jr_st_resolv, high) (jr_xsub_t* restrict state)
|
|
{
|
|
return state->xsubi+1;
|
|
}
|
|
|
|
inline static __attribute__((artificial, always_inline, gnu_inline, const)) int _resv_is_high()
|
|
{
|
|
// This being `static const` initialised makes this function be seen as a proper constant expression. Nothing is leaked on to the stack of the caller and the function is replaced with a single `lea`.
|
|
static const struct jr_state chk = {
|
|
.st.xsubl = JR_MAX,
|
|
};
|
|
return chk.st._xsub
|
|
? 1
|
|
: 0;
|
|
}
|
|
|
|
__attribute__((const))
|
|
inline static unsigned short* IFUNC_RESOLVER(_jr_st_resolv) (jr_xsub_t* restrict state)
|
|
{
|
|
// This ifunc is free.
|
|
// The optimiser recognises the return value of this function at compile time, and discards the unused function, removing the need for any runtime ifunc resolution.
|
|
// The ifunc `_jr_st_resolv()` is essentially (almost) the same as a symbol-aliased constexpr function.
|
|
// Ain't that neat?
|
|
|
|
#ifdef _RNGXX_JR_RESOLV_IFUNC_OLD_STACKDYN_CHECK
|
|
// The old, dynamic stack alloc of the struct, instead of putting it in global r/o data
|
|
struct jr_state chk = {0};
|
|
chk.st.xsubl = JR_MAX;
|
|
#else
|
|
// The new, static const alloc of the struct, puts the value in at compile-time. Better optimisation opportunities.
|
|
// NOTE: Both stratergies make the ifunc resolve its target at compile-time, there is no difference in this ifunc resolver between the two. However, in `_resv_is_high()`, the old (this) stratergy prevents it being recognised as a constant-expression and stack-allocates the useless memory in the function's (macro-expanded) caller. Causing a spill of useless instructions that the new method changes to one `lea'.
|
|
static const struct jr_state chk = {
|
|
.st.xsubl = JR_MAX,
|
|
};
|
|
#endif
|
|
return chk.st._xsub
|
|
? & IFUNC_NAME(_jr_st_resolv, high)
|
|
: & IFUNC_NAME(_jr_st_resolv, low);
|
|
}
|
|
#ifndef _RNGXX_JR_RESOLV_RUNTIME
|
|
#define _jr_st_resolv_f (_resv_is_high() \
|
|
? & IFUNC_NAME(_jr_st_resolv, high) \
|
|
: & IFUNC_NAME(_jr_st_resolv, low))
|
|
//unsigned short* (*const _jr_st_resolv_fp)(jr_xsub_t* restrict state) = _jr_st_resolv_f; // "not a constant expr ://
|
|
|
|
#define _jr_st_resolv(st) _jr_st_resolv_f (st)
|
|
#else
|
|
#define _jr_st_resolv_f _jr_st_resolv
|
|
#endif
|
|
|
|
_fspec(readonly) internal long _jr_lastl(const struct jr_state* restrict state)
|
|
{
|
|
return state->result;
|
|
}
|
|
|
|
_fspec(readonly) internal double _jr_lastf(const struct jr_state* restrict state)
|
|
{
|
|
return state->fresult;
|
|
}
|
|
|
|
internal void _jr_seed(struct jr_state* restrict state, unsigned long with)
|
|
{
|
|
state->st.xsubh = with;
|
|
seed48_r(_jr_st_resolv(&state->st), &state->data);
|
|
}
|
|
|
|
internal long _jr_proc(struct jr_state* restrict state)
|
|
{
|
|
jrand48_r(_jr_st_resolv(&state->st), &state->data, &state->result);
|
|
return state->result;
|
|
}
|
|
|
|
internal double _jr_procf(struct jr_state* restrict state)
|
|
{
|
|
erand48_r(_jr_st_resolv(&state->st), &state->data, &state->fresult);
|
|
return state->fresult;
|
|
}
|
|
|
|
__attribute__((malloc(_jr_free)))
|
|
internal struct jr_state* _jr_alloc()
|
|
{
|
|
struct jr_state* bx = aligned_alloc(_Alignof(struct jr_state), sizeof(struct jr_state));
|
|
if(UNLIKELY(!bx)) return NULL;
|
|
|
|
memset(bx, 0, sizeof(struct jr_state));
|
|
|
|
return bx;
|
|
}
|
|
__attribute__((malloc(_jr_free)))
|
|
internal struct jr_state* _jr_new(unsigned long with)
|
|
{
|
|
struct jr_state* state = _jr_alloc();
|
|
if(UNLIKELY(!state)) return NULL;
|
|
|
|
_jr_seed(state, with);
|
|
return state;
|
|
}
|
|
|
|
internal void _jr_free(struct jr_state* restrict state)
|
|
{
|
|
if(LIKELY(state))
|
|
free(state);
|
|
}
|
|
|
|
_fspec(pure)
|
|
internal unsigned short* _jr_state(struct jr_state* restrict state)
|
|
{
|
|
return _jr_st_resolv(&state->st);
|
|
}
|
|
|
|
void __TEST__jr_test()
|
|
{
|
|
struct jr_state* st = _jr_alloc();
|
|
assert(!st->st._xsub);
|
|
_jr_seed(st, ~0UL);
|
|
const volatile unsigned short* res_state = _jr_st_resolv(&st->st);
|
|
printf("seeded: %lu (full %lu, spill %u). xsubi = [%04x, %04x, %04x) %04x], resolv = [%04x, %04x, %04x) %04x]\n", (uint64_t)st->st.xsubh, st->st.xsubl, st->st._xsub,
|
|
st->st.xsubi[0],
|
|
st->st.xsubi[1],
|
|
st->st.xsubi[2],
|
|
st->st.xsubi[3],
|
|
|
|
res_state[0],
|
|
res_state[1],
|
|
res_state[2],
|
|
res_state[3]);
|
|
assert(!st->st._xsub);
|
|
for(int i=0;i<10;i++)
|
|
{
|
|
printf("res: %ld\n", _jr_proc(st));
|
|
printf("state: %lu (full %lu, spill %u). xsubi = %p, resolv = %p\n", (uint64_t)st->st.xsubh, st->st.xsubl, st->st._xsub,
|
|
(const void*)st->st.xsubi, (const void*)_jr_st_resolv(&st->st));
|
|
}
|
|
printf("ended: %lu (full %lu, spill %u). xsubi = [%04x, %04x, %04x) %04x], resolv = [%04x, %04x, %04x) %04x]\n", (uint64_t)st->st.xsubh, st->st.xsubl, st->st._xsub,
|
|
st->st.xsubi[0],
|
|
st->st.xsubi[1],
|
|
st->st.xsubi[2],
|
|
st->st.xsubi[3],
|
|
|
|
res_state[0],
|
|
res_state[1],
|
|
res_state[2],
|
|
res_state[3]);
|
|
assert(!st->st._xsub);
|
|
_jr_free(st);
|
|
}
|