Vectorised next_bytes(), next_v*().

XXX: next_bytes(std::array<u8, ...>) doesn"t seem to play nice with aliasing rules. Fortune for cpprng's current commit: Small blessing − 小吉
4 years ago · 5f57115683
parent acc580e031
commit 5f57115683
6 changed files with 57 additions and 7 deletions
--- a/3
+++ b/3
@ -8,8 +8,7 @@ SRC_C   = $(wildcard src/*.c) $(wildcard src/rng/*.c)
 SRC_CXX = $(wildcard src/*.cpp) $(wildcard src/rng/*.cpp)

 INCLUDE=include
-
-COMMON_FLAGS+= -W -Wall -pedantic -fno-strict-aliasing $(addprefix -I,$(INCLUDE))
+COMMON_FLAGS+= -W -Wall -pedantic  -fno-strict-aliasing $(addprefix -I,$(INCLUDE))

 OPT_FLAGS?= -march=native -fgraphite -fopenmp -floop-parallelize-all -ftree-parallelize-loops=4 \
 	    -floop-interchange -ftree-loop-distribution -floop-strip-mine -floop-block \
--- a/include/rng.h
+++ b/include/rng.h
@ -137,6 +137,7 @@ struct Random
 	template<usize N>
 	inline void next_bytes(std::array<u8, N>& ar)
 	{
+		// XXX: this doesn't seem to work (aliasing issues?)
 		_next_bytes<N>(&ar[0]);
 	}

@ -176,9 +177,7 @@ protected:
 	inline f64 sample() 
 	{
 		auto s = _sample();
-#ifdef DEBUG
-		if (s < 0 || s > 1) throw InvalidRandomSample{ s };
-#endif
+		if (UNLIKELY(s < 0 || s > 1)) throw InvalidRandomSample{ s };
 		return s;
 	}
 private:
--- a/include/rng/crand.h
+++ b/include/rng/crand.h
@ -23,11 +23,15 @@ namespace rng

 		i32 next_i32() override;
 		u32 next_u32() override;
+
+		void next_bytes(u8* p, usize n) override;
 	protected:
 		inline constexpr i64 _max_i64() const override { return RANGE_MAX; }
 		inline constexpr u64 _max_u64() const override { return (u64)RANGE_MAX; }
 		// the rest of the base `_max_*` functions are valid, as they will always be equal to or less than INT32_MAX (the upper bound of dr48.) 
 		f64 _sample() override;
+		void next_v64(u64* p, usize n) override;
+		void next_v32(u32* p, usize n) override;
 	private:
 		struct _opaque;
 		struct _deleter { static void delete_object(_opaque** st); };
--- a/src/rng/crand.c
+++ b/src/rng/crand.c
@ -113,6 +113,12 @@ internal void _jr_free(struct jr_state* restrict state)
 		free(state);
 }

+_fspec(pure)
+internal unsigned short* _jr_state(struct jr_state* restrict state)
+{
+	return _jr_st_resolv(&state->st);
+}
+
 void __TEST__jr_test()
 {
 	struct jr_state* st = _jr_alloc();
--- a/src/rng/crand.cpp
+++ b/src/rng/crand.cpp
@ -1,3 +1,5 @@
+#pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
+
 #include <bit>

 #include <climits>
@ -16,7 +18,14 @@ constexpr const i64 _J_RANGE_MAX = (i64)INT32_MAX;

 static_assert(_J_RANGE_MIN == rng::crand::RANGE_MIN);
 static_assert(_J_RANGE_MAX == rng::crand::RANGE_MAX);
-
+namespace {
+	template<typename T>
+	inline void setinc(u8* &ptr, auto val)
+	{
+		*reinterpret_cast<T*>(ptr) = T(val);
+		ptr += sizeof(T);
+	}
+}
 namespace rng
 {	
 	void crand::_deleter::delete_object(_opaque** state) { _jr_free(reinterpret_cast<jr_state*>(*state)); *state = nullptr; }
@ -35,7 +44,30 @@ namespace rng
 	i32 crand::next_i32() { return (i32)next_i64(); }
 	u32 crand::next_u32() { return std::bit_cast<u32>((i32)_sample_int()); } // I think keeping the sign bit in the transmute here doesn't violate the distribution, since it's between int32's min and max value...

-	//TODO: next_bytes(), next_v*()
+	// next_bytes(), next_v*()
+	void crand::next_bytes(u8* b, usize n)
+	{
+
+		while(n)
+			switch (n % 4) // this is soo dogy...
+			{
+				case 0: setinc<u32>(b, std::bit_cast<u64>(_sample_int() & _J_RANGE_MAX)); n-=4; break;
+				case 3: setinc<u8>(b, std::bit_cast<u64>(_sample_int()) & 0xff); n -= 1;
+				case 2: setinc<u16>(b, std::bit_cast<u64>(_sample_int()) & 0xffff); n -= 2;
+					break;
+				case 1: *b++ = u8(_sample_int() & 0xff); n -= 1; break;
+			}
+		
+	}
+	void crand::next_v32(u32* p, usize n)
+	{
+		while( n --> 0 ) *p++ = (u32)(_sample_int() & INT32_MAX);
+	}
+	void crand::next_v64(u64* p, usize n)
+	{
+		while( n --> 0) *p++ = u64(_sample_int() & INT32_MAX) + (u64(_sample_int() & INT32_MAX) << 32);
+	}
+
 }

 void rng_test()
@ -46,6 +78,15 @@ void rng_test()
 	printf("%d %d %d\n", r.next_i32(), r.next_i32(), r.next_i32());
 	printf("%u %u %u\n", r.next_u32(), r.next_u32(), r.next_u32());

+	union {
+		volatile u64 u;
+		u8 b[sizeof(u64)];
+		std::array<u8, sizeof(u64)> a;
+	} thing = {0};
+
+	r.next_bytes(thing.b, sizeof(u64));
+	printf("chaos: %lu, %lu, %lu\n", thing.u, (r.next_bytes(thing.a), thing.u), (r.next_bytes(thing.b), thing.u));
+
 	// TODO: these aren't implemented yet in the base Random huh...
 	printf("---\n%u %d %d %u\n", r.next_u32(10, 20), r.next_i32(10, 20), r.next_i32(10), r.next_u32(10));
 }
--- a/src/rng/crand.h
+++ b/src/rng/crand.h
@ -21,6 +21,7 @@ struct jr_state* _jr_new(unsigned long with) _export(internal) __attribute__((ma

 double _jr_lastf(const struct jr_state* restrict state) _export(internal) _fspec(readonly);
 long   _jr_lastl(const struct jr_state* restrict state) _export(internal) _fspec(readonly);
+unsigned short* _jr_state(struct jr_state* restrict state) _export(internal) _fspec(pure);

 #ifdef __cplusplus
 }