diff --git a/README.md b/README.md index 93779e1..b95032e 100644 --- a/README.md +++ b/README.md @@ -80,10 +80,11 @@ To disable stripping of release build binaries, run with `make STRIP=: release` ### Compile-time flags There are some build-time flags you can switch while building by appending to the `FEATURE_FLAGS` variable. -| Flag | Description | -|--------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `DEBUG` | Pretend we're building a debug release even though we're not. | -| `_FS_SPILL_BUFFER` | Spill buffers into a file if they grow over a threshold. Can cause massive slowdowns but prevent OOMs while unshuffling on systems with low available memory. See [shuffle3.h](./include/shuffle3.h) for more details | +| Flag | Description | +|------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `DEBUG` | Pretend we're building a debug release even though we're not. | +| `_FS_SPILL_BUFFER` | Spill buffers into a file if they grow over a threshold. Can cause massive slowdowns but prevent OOMs while unshuffling on systems with low available memory. See [shuffle3.h](./include/shuffle3.h) for more details | +| `_FS_SPILL_BUFFER=DYN` | Same as above except allocates memory dynamically. Might be faster. | ## Gentoo ebuild diff --git a/include/fsvec.hpp b/include/fsvec.hpp index e399e1a..269b461 100644 --- a/include/fsvec.hpp +++ b/include/fsvec.hpp @@ -107,7 +107,9 @@ struct fixed_spill_vector : public i_back_inserter { constexpr const static std::size_t SPILL_AT = Spill; - inline fixed_spill_vector() : mem(std::make_unique >()){} + inline fixed_spill_vector() : mem(std::make_unique >()){ + D_dprintf("alloc cap (static): %lu", Spill); + } inline fixed_spill_vector(const fixed_spill_vector& c) = delete; inline fixed_spill_vector(fixed_spill_vector&& m) : mem(std::move(m.mem)), @@ -161,3 +163,50 @@ private: std::unique_ptr> mem; file_vector fil; }; + +template +struct dynamic_spill_vector : public i_back_inserter +{ + inline dynamic_spill_vector() : dynamic_spill_vector(FSV_DEFAULT_SPILL_AT){} + inline dynamic_spill_vector(std::size_t cap) : dynamic_spill_vector(cap, cap){} + inline dynamic_spill_vector(std::size_t cap, std::size_t spill) : _spill_at(spill), mem(std::vector()), fil(file_vector(cap)) { + mem.reserve(cap); + D_dprintf("alloc cap %lu (sz %lu == 0?), spill %lu", cap, mem.size(), spill_at()); + } + inline dynamic_spill_vector(const dynamic_spill_vector& c) = delete; + inline dynamic_spill_vector(dynamic_spill_vector&& m) : + _spill_at(m._spill_at), + mem(std::move(m.mem)), + fil(std::move(m.fil)){} + + inline void push_back(T&& value) override + { + if(size()>=spill_at()) { + D_dprintf("Spilling: sz %lu, spl: %lu", size(), spill_at()); + fil.push_back(std::move(value)); + } + else mem.push_back(std::move(value)); + } + inline void pop_back() override + { + if(fil.size()) fil.pop_back(); + else mem.pop_back(); + } + inline const T& back() const override + { + if(fil.size()) return fil.back(); + else return mem.back(); + } + inline T& back() override + { + if(fil.size()) return fil.back(); + else return mem.back(); + } + inline const std::size_t size() const override { return mem.size() + fil.size(); } + + inline const std::size_t spill_at() const { return _spill_at; } +private: + std::size_t _spill_at; + std::vector mem; + file_vector fil; +}; diff --git a/include/shuffle.hpp b/include/shuffle.hpp index 4833f24..3800031 100644 --- a/include/shuffle.hpp +++ b/include/shuffle.hpp @@ -23,15 +23,31 @@ namespace rng { std::cout << " OK" << std::endl; } + namespace { + extern "C" int _can_allocate(std::size_t bytes); + template + inline bool can_allocate(std::size_t len) + { + return !!_can_allocate(len*sizeof(T)); + } + } + template inline void unshuffle(R& rng, span span) { if(!span.size()) return; -#ifdef _FS_SPILL_BUFFER - fixed_spill_vector rng_values;//(span.size()); //TODO: dynamic_spill_vector +#if defined(_FS_SPILL_BUFFER) && _FS_SPILL_BUFFER == DYN + dynamic_spill_vector rng_values = + can_allocate(span.size()) //Is there any way we can not waste this malloc() when it's valid? + ? dynamic_spill_vector (span.size()) + : dynamic_spill_vector (FSV_DEFAULT_SPILL_AT); + +#elif defined(_FS_SPILL_BUFFER) + fixed_spill_vector rng_values; #else - std::vector rng_values(span.size()); + std::vector rng_values; + rng_values.reserve(span.size()); #endif std::cout << " -> unshuffling " << span.size() << " objects..."; diff --git a/include/shuffle3.h b/include/shuffle3.h index 0a927c5..886e1ae 100644 --- a/include/shuffle3.h +++ b/include/shuffle3.h @@ -21,6 +21,16 @@ extern "C" { /// When to spill a file-backed buffer onto the fs (only used when `_FS_SPILL_BUFFER` is enabled). #define FSV_DEFAULT_SPILL_AT ((1024 * 1024) * 10) //10MB +/* +#ifdef _FS_SPILL_BUFFER +#define DYN 1 +#if _FS_SPILL_BUFFER == DYN +#undef _FS_SPILL_BUFFER +#define _FS_SPILL_BUFFER DYN +#endif +#undef DYN +#endif +*/ //** Globals *// extern const char* _prog_name; diff --git a/src/mem.c b/src/mem.c new file mode 100644 index 0000000..6fa2305 --- /dev/null +++ b/src/mem.c @@ -0,0 +1,12 @@ +#include +#include +#include + +int _can_allocate(size_t bytes) +{ + void* shim = malloc(bytes); + if(shim) { + free(shim); + return 1; + } else return 0; +}