Vectorised next_bytes(), next_v*().

XXX: next_bytes(std::array<u8, ...>) doesn"t seem to play nice with aliasing rules.

Fortune for cpprng's current commit: Small blessing − 小吉
lib
Avril 3 years ago
parent acc580e031
commit 5f57115683
Signed by: flanchan
GPG Key ID: 284488987C31F630

@ -8,8 +8,7 @@ SRC_C = $(wildcard src/*.c) $(wildcard src/rng/*.c)
SRC_CXX = $(wildcard src/*.cpp) $(wildcard src/rng/*.cpp)
INCLUDE=include
COMMON_FLAGS+= -W -Wall -pedantic -fno-strict-aliasing $(addprefix -I,$(INCLUDE))
COMMON_FLAGS+= -W -Wall -pedantic -fno-strict-aliasing $(addprefix -I,$(INCLUDE))
OPT_FLAGS?= -march=native -fgraphite -fopenmp -floop-parallelize-all -ftree-parallelize-loops=4 \
-floop-interchange -ftree-loop-distribution -floop-strip-mine -floop-block \

@ -137,6 +137,7 @@ struct Random
template<usize N>
inline void next_bytes(std::array<u8, N>& ar)
{
// XXX: this doesn't seem to work (aliasing issues?)
_next_bytes<N>(&ar[0]);
}
@ -176,9 +177,7 @@ protected:
inline f64 sample()
{
auto s = _sample();
#ifdef DEBUG
if (s < 0 || s > 1) throw InvalidRandomSample{ s };
#endif
if (UNLIKELY(s < 0 || s > 1)) throw InvalidRandomSample{ s };
return s;
}
private:

@ -23,11 +23,15 @@ namespace rng
i32 next_i32() override;
u32 next_u32() override;
void next_bytes(u8* p, usize n) override;
protected:
inline constexpr i64 _max_i64() const override { return RANGE_MAX; }
inline constexpr u64 _max_u64() const override { return (u64)RANGE_MAX; }
// the rest of the base `_max_*` functions are valid, as they will always be equal to or less than INT32_MAX (the upper bound of dr48.)
f64 _sample() override;
void next_v64(u64* p, usize n) override;
void next_v32(u32* p, usize n) override;
private:
struct _opaque;
struct _deleter { static void delete_object(_opaque** st); };

@ -113,6 +113,12 @@ internal void _jr_free(struct jr_state* restrict state)
free(state);
}
_fspec(pure)
internal unsigned short* _jr_state(struct jr_state* restrict state)
{
return _jr_st_resolv(&state->st);
}
void __TEST__jr_test()
{
struct jr_state* st = _jr_alloc();

@ -1,3 +1,5 @@
#pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
#include <bit>
#include <climits>
@ -16,7 +18,14 @@ constexpr const i64 _J_RANGE_MAX = (i64)INT32_MAX;
static_assert(_J_RANGE_MIN == rng::crand::RANGE_MIN);
static_assert(_J_RANGE_MAX == rng::crand::RANGE_MAX);
namespace {
template<typename T>
inline void setinc(u8* &ptr, auto val)
{
*reinterpret_cast<T*>(ptr) = T(val);
ptr += sizeof(T);
}
}
namespace rng
{
void crand::_deleter::delete_object(_opaque** state) { _jr_free(reinterpret_cast<jr_state*>(*state)); *state = nullptr; }
@ -35,7 +44,30 @@ namespace rng
i32 crand::next_i32() { return (i32)next_i64(); }
u32 crand::next_u32() { return std::bit_cast<u32>((i32)_sample_int()); } // I think keeping the sign bit in the transmute here doesn't violate the distribution, since it's between int32's min and max value...
//TODO: next_bytes(), next_v*()
// next_bytes(), next_v*()
void crand::next_bytes(u8* b, usize n)
{
while(n)
switch (n % 4) // this is soo dogy...
{
case 0: setinc<u32>(b, std::bit_cast<u64>(_sample_int() & _J_RANGE_MAX)); n-=4; break;
case 3: setinc<u8>(b, std::bit_cast<u64>(_sample_int()) & 0xff); n -= 1;
case 2: setinc<u16>(b, std::bit_cast<u64>(_sample_int()) & 0xffff); n -= 2;
break;
case 1: *b++ = u8(_sample_int() & 0xff); n -= 1; break;
}
}
void crand::next_v32(u32* p, usize n)
{
while( n --> 0 ) *p++ = (u32)(_sample_int() & INT32_MAX);
}
void crand::next_v64(u64* p, usize n)
{
while( n --> 0) *p++ = u64(_sample_int() & INT32_MAX) + (u64(_sample_int() & INT32_MAX) << 32);
}
}
void rng_test()
@ -46,6 +78,15 @@ void rng_test()
printf("%d %d %d\n", r.next_i32(), r.next_i32(), r.next_i32());
printf("%u %u %u\n", r.next_u32(), r.next_u32(), r.next_u32());
union {
volatile u64 u;
u8 b[sizeof(u64)];
std::array<u8, sizeof(u64)> a;
} thing = {0};
r.next_bytes(thing.b, sizeof(u64));
printf("chaos: %lu, %lu, %lu\n", thing.u, (r.next_bytes(thing.a), thing.u), (r.next_bytes(thing.b), thing.u));
// TODO: these aren't implemented yet in the base Random huh...
printf("---\n%u %d %d %u\n", r.next_u32(10, 20), r.next_i32(10, 20), r.next_i32(10), r.next_u32(10));
}

@ -21,6 +21,7 @@ struct jr_state* _jr_new(unsigned long with) _export(internal) __attribute__((ma
double _jr_lastf(const struct jr_state* restrict state) _export(internal) _fspec(readonly);
long _jr_lastl(const struct jr_state* restrict state) _export(internal) _fspec(readonly);
unsigned short* _jr_state(struct jr_state* restrict state) _export(internal) _fspec(pure);
#ifdef __cplusplus
}

Loading…
Cancel
Save