diff --git a/Makefile b/Makefile index 595c25f..86d7d15 100644 --- a/Makefile +++ b/Makefile @@ -8,8 +8,7 @@ SRC_C = $(wildcard src/*.c) $(wildcard src/rng/*.c) SRC_CXX = $(wildcard src/*.cpp) $(wildcard src/rng/*.cpp) INCLUDE=include - -COMMON_FLAGS+= -W -Wall -pedantic -fno-strict-aliasing $(addprefix -I,$(INCLUDE)) +COMMON_FLAGS+= -W -Wall -pedantic -fno-strict-aliasing $(addprefix -I,$(INCLUDE)) OPT_FLAGS?= -march=native -fgraphite -fopenmp -floop-parallelize-all -ftree-parallelize-loops=4 \ -floop-interchange -ftree-loop-distribution -floop-strip-mine -floop-block \ diff --git a/include/rng.h b/include/rng.h index 0741886..691d02a 100644 --- a/include/rng.h +++ b/include/rng.h @@ -137,6 +137,7 @@ struct Random template inline void next_bytes(std::array& ar) { + // XXX: this doesn't seem to work (aliasing issues?) _next_bytes(&ar[0]); } @@ -176,9 +177,7 @@ protected: inline f64 sample() { auto s = _sample(); -#ifdef DEBUG - if (s < 0 || s > 1) throw InvalidRandomSample{ s }; -#endif + if (UNLIKELY(s < 0 || s > 1)) throw InvalidRandomSample{ s }; return s; } private: diff --git a/include/rng/crand.h b/include/rng/crand.h index 71ce921..d52b5ef 100644 --- a/include/rng/crand.h +++ b/include/rng/crand.h @@ -23,11 +23,15 @@ namespace rng i32 next_i32() override; u32 next_u32() override; + + void next_bytes(u8* p, usize n) override; protected: inline constexpr i64 _max_i64() const override { return RANGE_MAX; } inline constexpr u64 _max_u64() const override { return (u64)RANGE_MAX; } // the rest of the base `_max_*` functions are valid, as they will always be equal to or less than INT32_MAX (the upper bound of dr48.) f64 _sample() override; + void next_v64(u64* p, usize n) override; + void next_v32(u32* p, usize n) override; private: struct _opaque; struct _deleter { static void delete_object(_opaque** st); }; diff --git a/src/rng/crand.c b/src/rng/crand.c index e816102..198d6e4 100644 --- a/src/rng/crand.c +++ b/src/rng/crand.c @@ -113,6 +113,12 @@ internal void _jr_free(struct jr_state* restrict state) free(state); } +_fspec(pure) +internal unsigned short* _jr_state(struct jr_state* restrict state) +{ + return _jr_st_resolv(&state->st); +} + void __TEST__jr_test() { struct jr_state* st = _jr_alloc(); diff --git a/src/rng/crand.cpp b/src/rng/crand.cpp index 899d92d..feab84d 100644 --- a/src/rng/crand.cpp +++ b/src/rng/crand.cpp @@ -1,3 +1,5 @@ +#pragma GCC diagnostic ignored "-Wimplicit-fallthrough" + #include #include @@ -16,7 +18,14 @@ constexpr const i64 _J_RANGE_MAX = (i64)INT32_MAX; static_assert(_J_RANGE_MIN == rng::crand::RANGE_MIN); static_assert(_J_RANGE_MAX == rng::crand::RANGE_MAX); - +namespace { + template + inline void setinc(u8* &ptr, auto val) + { + *reinterpret_cast(ptr) = T(val); + ptr += sizeof(T); + } +} namespace rng { void crand::_deleter::delete_object(_opaque** state) { _jr_free(reinterpret_cast(*state)); *state = nullptr; } @@ -35,7 +44,30 @@ namespace rng i32 crand::next_i32() { return (i32)next_i64(); } u32 crand::next_u32() { return std::bit_cast((i32)_sample_int()); } // I think keeping the sign bit in the transmute here doesn't violate the distribution, since it's between int32's min and max value... - //TODO: next_bytes(), next_v*() + // next_bytes(), next_v*() + void crand::next_bytes(u8* b, usize n) + { + + while(n) + switch (n % 4) // this is soo dogy... + { + case 0: setinc(b, std::bit_cast(_sample_int() & _J_RANGE_MAX)); n-=4; break; + case 3: setinc(b, std::bit_cast(_sample_int()) & 0xff); n -= 1; + case 2: setinc(b, std::bit_cast(_sample_int()) & 0xffff); n -= 2; + break; + case 1: *b++ = u8(_sample_int() & 0xff); n -= 1; break; + } + + } + void crand::next_v32(u32* p, usize n) + { + while( n --> 0 ) *p++ = (u32)(_sample_int() & INT32_MAX); + } + void crand::next_v64(u64* p, usize n) + { + while( n --> 0) *p++ = u64(_sample_int() & INT32_MAX) + (u64(_sample_int() & INT32_MAX) << 32); + } + } void rng_test() @@ -46,6 +78,15 @@ void rng_test() printf("%d %d %d\n", r.next_i32(), r.next_i32(), r.next_i32()); printf("%u %u %u\n", r.next_u32(), r.next_u32(), r.next_u32()); + union { + volatile u64 u; + u8 b[sizeof(u64)]; + std::array a; + } thing = {0}; + + r.next_bytes(thing.b, sizeof(u64)); + printf("chaos: %lu, %lu, %lu\n", thing.u, (r.next_bytes(thing.a), thing.u), (r.next_bytes(thing.b), thing.u)); + // TODO: these aren't implemented yet in the base Random huh... printf("---\n%u %d %d %u\n", r.next_u32(10, 20), r.next_i32(10, 20), r.next_i32(10), r.next_u32(10)); } diff --git a/src/rng/crand.h b/src/rng/crand.h index faaf6b7..4fe68d1 100644 --- a/src/rng/crand.h +++ b/src/rng/crand.h @@ -21,6 +21,7 @@ struct jr_state* _jr_new(unsigned long with) _export(internal) __attribute__((ma double _jr_lastf(const struct jr_state* restrict state) _export(internal) _fspec(readonly); long _jr_lastl(const struct jr_state* restrict state) _export(internal) _fspec(readonly); +unsigned short* _jr_state(struct jr_state* restrict state) _export(internal) _fspec(pure); #ifdef __cplusplus }