#include #include #include #include #include #include "crand.h" #define internal _export(internal) typedef unsigned short uint48_t[3]; struct jr_state { union { long result; double fresult; }; struct drand48_data data; union { //unsigned short xsubi[3]; uint48_t xsubi; struct { uint64_t xsubh : 48; uint16_t _xsub : 16; }; uint64_t xsubl; } st; }; typedef __typeof(((struct jr_state*)NULL)->st) jr_xsub_t; _Static_assert( sizeof(uint48_t) == (sizeof(uint16_t) * 3), "bad uint48 (ushort[3])"); _Static_assert( sizeof(((struct jr_state*)NULL)->st) == sizeof(uint64_t), "bad uint64 (union st)"); inline static unsigned short* IFUNC_DEF(_jr_st_resolv, (jr_xsub_t* restrict state) __attribute__((const, nonnull, returns_nonnull, access(none, 1)))); inline static unsigned short* IFUNC_IMPL(_jr_st_resolv, low) (jr_xsub_t* restrict state) { return state->xsubi; } inline static unsigned short* IFUNC_IMPL(_jr_st_resolv, high) (jr_xsub_t* restrict state) { return state->xsubi+1; } inline static __attribute__((artificial, always_inline, gnu_inline, const)) int _resv_is_high() { // This being `static const` initialised makes this function be seen as a proper constant expression. Nothing is leaked on to the stack of the caller and the function is replaced with a single `lea`. static const struct jr_state chk = { .st.xsubl = JR_MAX, }; return chk.st._xsub ? 1 : 0; } __attribute__((const)) inline static unsigned short* IFUNC_RESOLVER(_jr_st_resolv) (jr_xsub_t* restrict state) { // This ifunc is free. // The optimiser recognises the return value of this function at compile time, and discards the unused function, removing the need for any runtime ifunc resolution. // The ifunc `_jr_st_resolv()` is essentially (almost) the same as a symbol-aliased constexpr function. // Ain't that neat? #ifdef _RNGXX_JR_RESOLV_IFUNC_OLD_STACKDYN_CHECK // The old, dynamic stack alloc of the struct, instead of putting it in global r/o data struct jr_state chk = {0}; chk.st.xsubl = JR_MAX; #else // The new, static const alloc of the struct, puts the value in at compile-time. Better optimisation opportunities. // NOTE: Both stratergies make the ifunc resolve its target at compile-time, there is no difference in this ifunc resolver between the two. However, in `_resv_is_high()`, the old (this) stratergy prevents it being recognised as a constant-expression and stack-allocates the useless memory in the function's (macro-expanded) caller. Causing a spill of useless instructions that the new method changes to one `lea'. static const struct jr_state chk = { .st.xsubl = JR_MAX, }; #endif return chk.st._xsub ? & IFUNC_NAME(_jr_st_resolv, high) : & IFUNC_NAME(_jr_st_resolv, low); } #ifndef _RNGXX_JR_RESOLV_RUNTIME #define _jr_st_resolv_f (_resv_is_high() \ ? & IFUNC_NAME(_jr_st_resolv, high) \ : & IFUNC_NAME(_jr_st_resolv, low)) //unsigned short* (*const _jr_st_resolv_fp)(jr_xsub_t* restrict state) = _jr_st_resolv_f; // "not a constant expr :// #define _jr_st_resolv(st) _jr_st_resolv_f (st) #else #define _jr_st_resolv_f _jr_st_resolv #endif _fspec(readonly) internal long _jr_lastl(const struct jr_state* restrict state) { return state->result; } _fspec(readonly) internal double _jr_lastf(const struct jr_state* restrict state) { return state->fresult; } internal void _jr_seed(struct jr_state* restrict state, unsigned long with) { state->st.xsubh = with; seed48_r(_jr_st_resolv(&state->st), &state->data); } internal long _jr_proc(struct jr_state* restrict state) { jrand48_r(_jr_st_resolv(&state->st), &state->data, &state->result); return state->result; } internal double _jr_procf(struct jr_state* restrict state) { erand48_r(_jr_st_resolv(&state->st), &state->data, &state->fresult); return state->fresult; } __attribute__((malloc(_jr_free))) internal struct jr_state* _jr_alloc() { struct jr_state* bx = aligned_alloc(_Alignof(struct jr_state), sizeof(struct jr_state)); if(UNLIKELY(!bx)) return NULL; memset(bx, 0, sizeof(struct jr_state)); return bx; } __attribute__((malloc(_jr_free))) internal struct jr_state* _jr_new(unsigned long with) { struct jr_state* state = _jr_alloc(); if(UNLIKELY(!state)) return NULL; _jr_seed(state, with); return state; } internal void _jr_free(struct jr_state* restrict state) { if(LIKELY(state)) free(state); } _fspec(pure) internal unsigned short* _jr_state(struct jr_state* restrict state) { return _jr_st_resolv(&state->st); } void __TEST__jr_test() { struct jr_state* st = _jr_alloc(); assert(!st->st._xsub); _jr_seed(st, ~0UL); const volatile unsigned short* res_state = _jr_st_resolv(&st->st); printf("seeded: %lu (full %lu, spill %u). xsubi = [%04x, %04x, %04x) %04x], resolv = [%04x, %04x, %04x) %04x]\n", (uint64_t)st->st.xsubh, st->st.xsubl, st->st._xsub, st->st.xsubi[0], st->st.xsubi[1], st->st.xsubi[2], st->st.xsubi[3], res_state[0], res_state[1], res_state[2], res_state[3]); assert(!st->st._xsub); for(int i=0;i<10;i++) { printf("res: %ld\n", _jr_proc(st)); printf("state: %lu (full %lu, spill %u). xsubi = %p, resolv = %p\n", (uint64_t)st->st.xsubh, st->st.xsubl, st->st._xsub, (const void*)st->st.xsubi, (const void*)_jr_st_resolv(&st->st)); } printf("ended: %lu (full %lu, spill %u). xsubi = [%04x, %04x, %04x) %04x], resolv = [%04x, %04x, %04x) %04x]\n", (uint64_t)st->st.xsubh, st->st.xsubl, st->st._xsub, st->st.xsubi[0], st->st.xsubi[1], st->st.xsubi[2], st->st.xsubi[3], res_state[0], res_state[1], res_state[2], res_state[3]); assert(!st->st._xsub); _jr_free(st); }