From b882bfa8ea3ff25e186a804cd9c00e2623ef9999 Mon Sep 17 00:00:00 2001 From: Avril Date: Wed, 2 Dec 2020 20:05:09 +0000 Subject: [PATCH] added _FS_SPILL_BUFFER=MAP --- Makefile | 1 + README.md | 1 + include/fsvec.hpp | 80 ++++++++++++++++++++++++++++++++++++++++++++ include/map.h | 9 ++++- include/shuffle.hpp | 21 +++++++++--- include/tempfile.hpp | 49 +++++++++++++++++++++++++++ src/fsvec.cpp | 37 ++------------------ src/map.c | 29 ++++++++++++++++ 8 files changed, 187 insertions(+), 40 deletions(-) create mode 100644 include/tempfile.hpp diff --git a/Makefile b/Makefile index 9ba7bdc..2f3fc6c 100644 --- a/Makefile +++ b/Makefile @@ -8,6 +8,7 @@ PROJECT=shuffle3 # Currently supported: # _FS_SPILL_BUFFER: Use file backed buffer instead of memory backed one for unshuffling. See `shuffle3.h`. +# Setting the value to `DYN` enables the dynamic buffer, setting it to `MAP` enabled memory-mapped buffer. `MAP` is usually the fastest of the 3 modes. # DEBUG: Pretend we're building a debug release even though we're not. Will enable additional output messages and may interfere with some optimisations FEATURE_FLAGS?= diff --git a/README.md b/README.md index b95032e..4387ade 100644 --- a/README.md +++ b/README.md @@ -85,6 +85,7 @@ There are some build-time flags you can switch while building by appending to th | `DEBUG` | Pretend we're building a debug release even though we're not. | | `_FS_SPILL_BUFFER` | Spill buffers into a file if they grow over a threshold. Can cause massive slowdowns but prevent OOMs while unshuffling on systems with low available memory. See [shuffle3.h](./include/shuffle3.h) for more details | | `_FS_SPILL_BUFFER=DYN` | Same as above except allocates memory dynamically. Might be faster. | +| `_FS_SPILL_BUFFER=MAP` | Same as above except it calls `fallocate()` and `mmap()` to prodive a buffer of the full size needed. Is usually the fastest of the options for `_FS_SPILL_BUFFER` and is preferrable if possible. | ## Gentoo ebuild diff --git a/include/fsvec.hpp b/include/fsvec.hpp index 3fd79dd..033eabc 100644 --- a/include/fsvec.hpp +++ b/include/fsvec.hpp @@ -6,9 +6,12 @@ #include #include +#include +#include #include #include + #include template @@ -170,6 +173,72 @@ private: file_vector fil; }; +template +struct mapped_vector : public i_back_inserter, public i_shunt +{ + inline static mapped_vector from_temp(std::size_t sz) + { + D_dprintf("generating with %lu size", sz); + temp_file file; + mapped_vector mvec(file.full_path().c_str(), sz); + D_dprintf("generated?"); + mvec.temp = std::make_unique(std::move(file)); + return mvec; + } + inline mapped_vector(const char* file, std::size_t sz) + : sz(sz), + temp(nullptr), + map(mm::mmap::allocate(file, sz * sizeof(T))){} + inline mapped_vector(const mapped_vector& c) = delete; + inline mapped_vector(mapped_vector&& m) + : sz(m.sz), + fill_ptr(m.fill_ptr), + temp(std::move(m.temp)), + map(std::move(m.map)){} + inline mapped_vector() : mapped_vector(nullptr, 0) + { + panic("unsupported"); + } + + inline void push_back(T&& value) override + { + if(is_full()) panic("Tried to push past end of map"); + else memory()[++fill_ptr] = value; + } + inline void pop_back() override + { + if(fill_ptr>=0) fill_ptr-=1; + } + + inline T& back() override + { + if(fill_ptr>=0) + { + return memory()[fill_ptr]; + } else panic("back() called with no elements"); + } + inline const T& back() const override + { + if(fill_ptr>=0) + { + return memory()[fill_ptr]; + } else panic("back() const called with no elements"); + } + inline const std::size_t size() const override { return ((std::size_t)fill_ptr)+1; } + inline std::size_t cap() const { return sz; } + + inline bool is_full() const { return fill_ptr >= (ssize_t)(sz-1); } +protected: + inline const span memory() const { return map.as_span().reinterpret(); } + inline span memory() { return map.as_span().reinterpret(); } +private: + std::size_t sz; + ssize_t fill_ptr=-1; + + std::unique_ptr temp; + mm::mmap map; +}; + template requires(std::is_base_of, Shunt >::value) struct shunt : public i_back_inserter, protected i_shunt @@ -177,7 +246,15 @@ struct shunt : public i_back_inserter, protected i_shunt typedef Shunt spill_type; inline shunt() : shunt(FSV_DEFAULT_SPILL_AT){} + inline shunt(spill_type&& into) : shunt(FSV_DEFAULT_SPILL_AT, std::move(into)){} inline shunt(std::size_t cap) : shunt(cap, cap){} + inline shunt(std::size_t cap, spill_type&& into) : shunt(cap, cap, std::move(into)){} + inline shunt(std::size_t cap, std::size_t spill, spill_type&& into) + : _spill_at(spill), mem(std::vector()), fil(std::make_unique(std::move(into))) { + mem.reserve(cap); + + D_dprintf("alloc (explicit) cap %lu (sz %lu == 0?), spill %lu", cap, mem.size(), spill_at()); + } inline shunt(std::size_t cap, std::size_t spill) : _spill_at(spill), mem(std::vector()), fil(nullptr) { mem.reserve(cap); D_dprintf("alloc cap %lu (sz %lu == 0?), spill %lu", cap, mem.size(), spill_at()); @@ -242,3 +319,6 @@ private: template using dynamic_spill_vector = shunt >; + +template +using mapped_spill_vector = shunt >; diff --git a/include/map.h b/include/map.h index 307481a..e65ade3 100644 --- a/include/map.h +++ b/include/map.h @@ -15,6 +15,7 @@ typedef struct mmap { size_t len; } mmap_t; +int open_and_alloc(const char* file, mmap_t* restrict ptr, size_t sz); int open_and_map(const char* file, mmap_t* restrict ptr); int unmap_and_close(mmap_t map); @@ -28,6 +29,12 @@ void* map_and_then(const char* file, map_cb callback, void* user); #include namespace mm { struct mmap { + inline static mmap allocate(const char* file, std::size_t sz) + { + mmap_t map; + if(!open_and_alloc(file, &map, sz)) panic("Failed to allocmap file"); + return mmap(map); + } inline static mmap_t create_raw(const char* file) { mmap_t map; @@ -35,7 +42,7 @@ namespace mm { return map; } - inline mmap(mmap_t raw) :inner(raw){} + inline explicit mmap(mmap_t raw) :inner(raw){} inline mmap(const char* file) : inner(create_raw(file)) {} diff --git a/include/shuffle.hpp b/include/shuffle.hpp index 3800031..4686ed5 100644 --- a/include/shuffle.hpp +++ b/include/shuffle.hpp @@ -37,18 +37,31 @@ namespace rng { { if(!span.size()) return; -#if defined(_FS_SPILL_BUFFER) && _FS_SPILL_BUFFER == DYN +#define DYN 2 +#define MAP 3 +#if defined(_FS_SPILL_BUFFER) && (_FS_SPILL_BUFFER == DYN) + D_dprintf("spill=dyn"); dynamic_spill_vector rng_values = - can_allocate(span.size()) //Is there any way we can not waste this malloc() when it's valid? - ? dynamic_spill_vector (span.size()) + //can_allocate(span.size()) //Is there any way we can not waste this malloc() when it's valid? + span.size() <= FSV_DEFAULT_SPILL_AT + ? dynamic_spill_vector (span.size(), FSV_DEFAULT_SPILL_AT) : dynamic_spill_vector (FSV_DEFAULT_SPILL_AT); - +#elif defined(_FS_SPILL_BUFFER) && (_FS_SPILL_BUFFER == MAP) + D_dprintf("spill=map"); + mapped_spill_vector rng_values = + span.size() <= FSV_DEFAULT_SPILL_AT + ? mapped_spill_vector (span.size(), FSV_DEFAULT_SPILL_AT) + : mapped_spill_vector (FSV_DEFAULT_SPILL_AT, mapped_vector::from_temp(span.size() - FSV_DEFAULT_SPILL_AT)); #elif defined(_FS_SPILL_BUFFER) + D_dprintf("spill=static"); fixed_spill_vector rng_values; #else + D_dprintf("spill=none"); std::vector rng_values; rng_values.reserve(span.size()); #endif +#undef MAP +#undef DYN std::cout << " -> unshuffling " << span.size() << " objects..."; for(std::size_t i=span.size()-1;i>0;i--) diff --git a/include/tempfile.hpp b/include/tempfile.hpp new file mode 100644 index 0000000..0c69199 --- /dev/null +++ b/include/tempfile.hpp @@ -0,0 +1,49 @@ +#pragma once + +#include +#include +#include +#include "uuid.hpp" + +#include + +using std::size_t; +namespace fs = std::filesystem; + +/// A temporary file name +struct temp_file +{ + inline temp_file(const temp_file& c) = delete; + + inline temp_file(temp_file&& m) : name(std::move(m.name)), _full_path(std::move(m._full_path)) {} + inline temp_file() : name(uuid::generate().to_string()+"-s3"){} + inline temp_file(const char* name) : name(name) {} + inline temp_file(std::string&& name) : name(name) {} + + inline ~temp_file() + { + if(name.empty() && _full_path.empty()) return; + + D_dprintf("~tempfile(): %s", _full_path.c_str()); + + if(!_full_path.empty() && fs::exists(_full_path) ) { + D_dprintf("tfile removing: %s", _full_path.c_str()); + fs::remove(_full_path); + } + } + + inline const fs::path& full_path() const + { + if(_full_path.empty()) { + _full_path = fs::absolute( fs::temp_directory_path() / name ); + D_dprintf("tfile path: %s", _full_path.c_str()); + } + return _full_path; + } + inline const std::string& base_name() const { return name; } + + inline const fs::path* operator->() const { return &full_path(); } +private: + std::string name; + mutable fs::path _full_path; +}; diff --git a/src/fsvec.cpp b/src/fsvec.cpp index 16dedec..a6cb94a 100644 --- a/src/fsvec.cpp +++ b/src/fsvec.cpp @@ -5,43 +5,10 @@ #include #include -#define FB file_back_buffer -using std::size_t; -namespace fs = std::filesystem; - -/// A temporary file name -struct temp_file -{ - inline temp_file(const temp_file& c) = delete; +#include - inline temp_file(temp_file&& m) : name(std::move(m.name)) { m._full_path.clear(); } - inline temp_file() : name(uuid::generate().to_string()+"-s3"){} - inline temp_file(const char* name) : name(name) {} - inline temp_file(std::string&& name) : name(name) {} - - inline ~temp_file() - { - if(!_full_path.empty() && fs::exists(_full_path) ) { - D_dprintf("tfile removing: %s", _full_path.c_str()); - fs::remove(_full_path); - } - } - - inline const fs::path& full_path() const - { - if(_full_path.empty()) { - _full_path = fs::absolute( fs::temp_directory_path() / name ); - D_dprintf("tfile path: %s", _full_path.c_str()); - } - return _full_path; - } - inline const std::string& base_name() const { return name; } +#define FB file_back_buffer - inline const fs::path* operator->() const { return &full_path(); } -private: - std::string name; - mutable fs::path _full_path; -}; struct FB::impl { diff --git a/src/map.c b/src/map.c index 2a245af..60e5011 100644 --- a/src/map.c +++ b/src/map.c @@ -40,6 +40,35 @@ int open_and_map(const char* file, mmap_t* restrict ptr) return 1; } +int open_and_alloc(const char* file, mmap_t* restrict ptr, size_t sz) +{ + int fd; + if ((fd = open(file, O_CREAT | O_RDWR, FILEMODE)) < 0) { + perror("Failed to open file"); + return 0; + } + + if(fallocate(fd, 0, 0, sz)) + { + perror("Failed to allocate"); + close(fd); + return 0; + } + + register struct mmap map = { .fd = fd, .ptr = NULL, .len = sz }; + + if ((map.ptr = mmap(NULL, map.len, PROT_READ | PROT_WRITE, MAP_SHARED,fd, 0)) == MAP_FAILED) { + perror("mmap() failed"); + close(fd); + return 0; + } + + *ptr = map; + + return 1; +} + + int unmap_and_close(mmap_t map) { register int rval=1;