diff --git a/Makefile b/Makefile index 65a1329..595c25f 100644 --- a/Makefile +++ b/Makefile @@ -9,7 +9,7 @@ SRC_CXX = $(wildcard src/*.cpp) $(wildcard src/rng/*.cpp) INCLUDE=include -COMMON_FLAGS= -W -Wall -pedantic -fno-strict-aliasing $(addprefix -I,$(INCLUDE)) +COMMON_FLAGS+= -W -Wall -pedantic -fno-strict-aliasing $(addprefix -I,$(INCLUDE)) OPT_FLAGS?= -march=native -fgraphite -fopenmp -floop-parallelize-all -ftree-parallelize-loops=4 \ -floop-interchange -ftree-loop-distribution -floop-strip-mine -floop-block \ @@ -32,7 +32,7 @@ ifneq ($(TARGET_SPEC_FLAGS),no) RELEASE_LDFLAGS?= -Wl,-O3 -Wl,-flto DEBUG_CFLAGS?= -Og -g - DEBUG_CXXFLAGS?=-O0 -g + DEBUG_CXXFLAGS?=-Og -g DEBUG_LDFLAGS?= endif diff --git a/include/common.h b/include/common.h index 3b8a089..2433d34 100644 --- a/include/common.h +++ b/include/common.h @@ -15,6 +15,9 @@ extern "C" { #define IFUNC_RESOLVER(name) IFUNC_RESOLVER_A((copy(name)), name) #define IFUNC_DEF(name, params) name params __attribute__((__ifunc__("_ifun__" #name))) +#define LIKELY(expr) __builtin_expect(!!(expr), 1) +#define UNLIKELY(expr) __builtin_expect(!!(expr), 0) + #define _export(kind) __attribute__((visibility(#kind))) #define v_internal _export(internal) #define v_protected _export(protected) diff --git a/src/aligned_ptr.cpp b/src/aligned_ptr.cpp index 8179a7d..943d740 100644 --- a/src/aligned_ptr.cpp +++ b/src/aligned_ptr.cpp @@ -2,12 +2,13 @@ #include +#include #include template static inline T* not_null(T* ptr) { - if(!ptr) throw "TODO: Nullptr except"; + if(UNLIKELY(!ptr)) throw "TODO: Nullptr except"; return ptr; } diff --git a/src/rng/crand.c b/src/rng/crand.c index a5a46c4..c572fba 100644 --- a/src/rng/crand.c +++ b/src/rng/crand.c @@ -32,20 +32,25 @@ typedef __typeof(((struct jr_state*)NULL)->st) jr_xsub_t; _Static_assert( sizeof(uint48_t) == (sizeof(uint16_t) * 3), "bad uint48 (ushort[3])"); _Static_assert( sizeof(((struct jr_state*)NULL)->st) == sizeof(uint64_t), "bad uint64 (union st)"); -static unsigned short* IFUNC_DEF(_jr_st_resolv, (jr_xsub_t* restrict state) +inline static unsigned short* IFUNC_DEF(_jr_st_resolv, (jr_xsub_t* restrict state) __attribute__((const, nonnull, returns_nonnull))); -static unsigned short* IFUNC_IMPL(_jr_st_resolv, low) (jr_xsub_t* restrict state) +inline static unsigned short* IFUNC_IMPL(_jr_st_resolv, low) (jr_xsub_t* restrict state) { return state->xsubi; } -static unsigned short* IFUNC_IMPL(_jr_st_resolv, high) (jr_xsub_t* restrict state) +inline static unsigned short* IFUNC_IMPL(_jr_st_resolv, high) (jr_xsub_t* restrict state) { return state->xsubi+1; } __attribute__((const)) -static unsigned short* IFUNC_RESOLVER(_jr_st_resolv) (jr_xsub_t* restrict state) +inline static unsigned short* IFUNC_RESOLVER(_jr_st_resolv) (jr_xsub_t* restrict state) { + // This ifunc is free. + // The optimiser recognises the return value of this function at compile time, and discards the unused function, removing the need for any runtime ifunc resolution. + // The ifunc `_jr_st_resolv()` is essentially (almost) the same as a symbol-aliased constexpr function. + // Ain't that neat? + struct jr_state chk = {0}; chk.st.xsubh = JR_MAX; return chk.st._xsub @@ -75,6 +80,8 @@ __attribute__((malloc(_jr_free))) internal struct jr_state* _jr_alloc() { struct jr_state* bx = aligned_alloc(_Alignof(struct jr_state), sizeof(struct jr_state)); + if(UNLIKELY(!bx)) return NULL; + memset(bx, 0, sizeof(struct jr_state)); return bx; @@ -83,13 +90,16 @@ __attribute__((malloc(_jr_free))) internal struct jr_state* _jr_new(unsigned long with) { struct jr_state* state = _jr_alloc(); + if(UNLIKELY(!state)) return NULL; + _jr_seed(state, with); return state; } internal void _jr_free(struct jr_state* restrict state) { - free(state); + if(LIKELY(state)) + free(state); } void __TEST__jr_test()