added _FS_SPILL_BUFFER=MAP

progress v2.2.0
Avril 4 years ago
parent 9270fe49ab
commit b882bfa8ea
Signed by: flanchan
GPG Key ID: 284488987C31F630

@ -8,6 +8,7 @@ PROJECT=shuffle3
# Currently supported:
# _FS_SPILL_BUFFER: Use file backed buffer instead of memory backed one for unshuffling. See `shuffle3.h`.
# Setting the value to `DYN` enables the dynamic buffer, setting it to `MAP` enabled memory-mapped buffer. `MAP` is usually the fastest of the 3 modes.
# DEBUG: Pretend we're building a debug release even though we're not. Will enable additional output messages and may interfere with some optimisations
FEATURE_FLAGS?=

@ -85,6 +85,7 @@ There are some build-time flags you can switch while building by appending to th
| `DEBUG` | Pretend we're building a debug release even though we're not. |
| `_FS_SPILL_BUFFER` | Spill buffers into a file if they grow over a threshold. Can cause massive slowdowns but prevent OOMs while unshuffling on systems with low available memory. See [shuffle3.h](./include/shuffle3.h) for more details |
| `_FS_SPILL_BUFFER=DYN` | Same as above except allocates memory dynamically. Might be faster. |
| `_FS_SPILL_BUFFER=MAP` | Same as above except it calls `fallocate()` and `mmap()` to prodive a buffer of the full size needed. Is usually the fastest of the options for `_FS_SPILL_BUFFER` and is preferrable if possible. |
## Gentoo ebuild

@ -6,9 +6,12 @@
#include <utility>
#include <vector>
#include <map.h>
#include <tempfile.hpp>
#include <panic.h>
#include <debug.h>
#include <shuffle3.h>
template<typename T>
@ -170,6 +173,72 @@ private:
file_vector<T> fil;
};
template<typename T>
struct mapped_vector : public i_back_inserter<T>, public i_shunt
{
inline static mapped_vector<T> from_temp(std::size_t sz)
{
D_dprintf("generating with %lu size", sz);
temp_file file;
mapped_vector<T> mvec(file.full_path().c_str(), sz);
D_dprintf("generated?");
mvec.temp = std::make_unique<temp_file>(std::move(file));
return mvec;
}
inline mapped_vector(const char* file, std::size_t sz)
: sz(sz),
temp(nullptr),
map(mm::mmap::allocate(file, sz * sizeof(T))){}
inline mapped_vector(const mapped_vector<T>& c) = delete;
inline mapped_vector(mapped_vector<T>&& m)
: sz(m.sz),
fill_ptr(m.fill_ptr),
temp(std::move(m.temp)),
map(std::move(m.map)){}
inline mapped_vector() : mapped_vector(nullptr, 0)
{
panic("unsupported");
}
inline void push_back(T&& value) override
{
if(is_full()) panic("Tried to push past end of map");
else memory()[++fill_ptr] = value;
}
inline void pop_back() override
{
if(fill_ptr>=0) fill_ptr-=1;
}
inline T& back() override
{
if(fill_ptr>=0)
{
return memory()[fill_ptr];
} else panic("back() called with no elements");
}
inline const T& back() const override
{
if(fill_ptr>=0)
{
return memory()[fill_ptr];
} else panic("back() const called with no elements");
}
inline const std::size_t size() const override { return ((std::size_t)fill_ptr)+1; }
inline std::size_t cap() const { return sz; }
inline bool is_full() const { return fill_ptr >= (ssize_t)(sz-1); }
protected:
inline const span<T> memory() const { return map.as_span().reinterpret<T>(); }
inline span<T> memory() { return map.as_span().reinterpret<T>(); }
private:
std::size_t sz;
ssize_t fill_ptr=-1;
std::unique_ptr<temp_file> temp;
mm::mmap map;
};
template<typename T, typename Shunt>
requires(std::is_base_of<i_back_inserter<T>, Shunt >::value)
struct shunt : public i_back_inserter<T>, protected i_shunt
@ -177,7 +246,15 @@ struct shunt : public i_back_inserter<T>, protected i_shunt
typedef Shunt spill_type;
inline shunt() : shunt(FSV_DEFAULT_SPILL_AT){}
inline shunt(spill_type&& into) : shunt(FSV_DEFAULT_SPILL_AT, std::move(into)){}
inline shunt(std::size_t cap) : shunt(cap, cap){}
inline shunt(std::size_t cap, spill_type&& into) : shunt(cap, cap, std::move(into)){}
inline shunt(std::size_t cap, std::size_t spill, spill_type&& into)
: _spill_at(spill), mem(std::vector<T>()), fil(std::make_unique<spill_type>(std::move(into))) {
mem.reserve(cap);
D_dprintf("alloc (explicit) cap %lu (sz %lu == 0?), spill %lu", cap, mem.size(), spill_at());
}
inline shunt(std::size_t cap, std::size_t spill) : _spill_at(spill), mem(std::vector<T>()), fil(nullptr) {
mem.reserve(cap);
D_dprintf("alloc cap %lu (sz %lu == 0?), spill %lu", cap, mem.size(), spill_at());
@ -242,3 +319,6 @@ private:
template<typename T>
using dynamic_spill_vector = shunt<T, file_vector<T> >;
template<typename T>
using mapped_spill_vector = shunt<T, mapped_vector<T> >;

@ -15,6 +15,7 @@ typedef struct mmap {
size_t len;
} mmap_t;
int open_and_alloc(const char* file, mmap_t* restrict ptr, size_t sz);
int open_and_map(const char* file, mmap_t* restrict ptr);
int unmap_and_close(mmap_t map);
@ -28,6 +29,12 @@ void* map_and_then(const char* file, map_cb callback, void* user);
#include <cstdint>
namespace mm {
struct mmap {
inline static mmap allocate(const char* file, std::size_t sz)
{
mmap_t map;
if(!open_and_alloc(file, &map, sz)) panic("Failed to allocmap file");
return mmap(map);
}
inline static mmap_t create_raw(const char* file)
{
mmap_t map;
@ -35,7 +42,7 @@ namespace mm {
return map;
}
inline mmap(mmap_t raw) :inner(raw){}
inline explicit mmap(mmap_t raw) :inner(raw){}
inline mmap(const char* file)
: inner(create_raw(file)) {}

@ -37,18 +37,31 @@ namespace rng {
{
if(!span.size()) return;
#if defined(_FS_SPILL_BUFFER) && _FS_SPILL_BUFFER == DYN
#define DYN 2
#define MAP 3
#if defined(_FS_SPILL_BUFFER) && (_FS_SPILL_BUFFER == DYN)
D_dprintf("spill=dyn");
dynamic_spill_vector<std::size_t> rng_values =
can_allocate<std::size_t>(span.size()) //Is there any way we can not waste this malloc() when it's valid?
? dynamic_spill_vector<std::size_t> (span.size())
//can_allocate<std::size_t>(span.size()) //Is there any way we can not waste this malloc() when it's valid?
span.size() <= FSV_DEFAULT_SPILL_AT
? dynamic_spill_vector<std::size_t> (span.size(), FSV_DEFAULT_SPILL_AT)
: dynamic_spill_vector<std::size_t> (FSV_DEFAULT_SPILL_AT);
#elif defined(_FS_SPILL_BUFFER) && (_FS_SPILL_BUFFER == MAP)
D_dprintf("spill=map");
mapped_spill_vector<std::size_t> rng_values =
span.size() <= FSV_DEFAULT_SPILL_AT
? mapped_spill_vector<std::size_t> (span.size(), FSV_DEFAULT_SPILL_AT)
: mapped_spill_vector<std::size_t> (FSV_DEFAULT_SPILL_AT, mapped_vector<std::size_t>::from_temp(span.size() - FSV_DEFAULT_SPILL_AT));
#elif defined(_FS_SPILL_BUFFER)
D_dprintf("spill=static");
fixed_spill_vector<std::size_t> rng_values;
#else
D_dprintf("spill=none");
std::vector<std::size_t> rng_values;
rng_values.reserve(span.size());
#endif
#undef MAP
#undef DYN
std::cout << " -> unshuffling " << span.size() << " objects...";
for(std::size_t i=span.size()-1;i>0;i--)

@ -0,0 +1,49 @@
#pragma once
#include <filesystem>
#include <string>
#include <utility>
#include "uuid.hpp"
#include <debug.h>
using std::size_t;
namespace fs = std::filesystem;
/// A temporary file name
struct temp_file
{
inline temp_file(const temp_file& c) = delete;
inline temp_file(temp_file&& m) : name(std::move(m.name)), _full_path(std::move(m._full_path)) {}
inline temp_file() : name(uuid::generate().to_string()+"-s3"){}
inline temp_file(const char* name) : name(name) {}
inline temp_file(std::string&& name) : name(name) {}
inline ~temp_file()
{
if(name.empty() && _full_path.empty()) return;
D_dprintf("~tempfile(): %s", _full_path.c_str());
if(!_full_path.empty() && fs::exists(_full_path) ) {
D_dprintf("tfile removing: %s", _full_path.c_str());
fs::remove(_full_path);
}
}
inline const fs::path& full_path() const
{
if(_full_path.empty()) {
_full_path = fs::absolute( fs::temp_directory_path() / name );
D_dprintf("tfile path: %s", _full_path.c_str());
}
return _full_path;
}
inline const std::string& base_name() const { return name; }
inline const fs::path* operator->() const { return &full_path(); }
private:
std::string name;
mutable fs::path _full_path;
};

@ -5,43 +5,10 @@
#include <fsvec.h>
#include <debug.h>
#define FB file_back_buffer
using std::size_t;
namespace fs = std::filesystem;
/// A temporary file name
struct temp_file
{
inline temp_file(const temp_file& c) = delete;
inline temp_file(temp_file&& m) : name(std::move(m.name)) { m._full_path.clear(); }
inline temp_file() : name(uuid::generate().to_string()+"-s3"){}
inline temp_file(const char* name) : name(name) {}
inline temp_file(std::string&& name) : name(name) {}
#include <tempfile.hpp>
inline ~temp_file()
{
if(!_full_path.empty() && fs::exists(_full_path) ) {
D_dprintf("tfile removing: %s", _full_path.c_str());
fs::remove(_full_path);
}
}
inline const fs::path& full_path() const
{
if(_full_path.empty()) {
_full_path = fs::absolute( fs::temp_directory_path() / name );
D_dprintf("tfile path: %s", _full_path.c_str());
}
return _full_path;
}
inline const std::string& base_name() const { return name; }
#define FB file_back_buffer
inline const fs::path* operator->() const { return &full_path(); }
private:
std::string name;
mutable fs::path _full_path;
};
struct FB::impl
{

@ -40,6 +40,35 @@ int open_and_map(const char* file, mmap_t* restrict ptr)
return 1;
}
int open_and_alloc(const char* file, mmap_t* restrict ptr, size_t sz)
{
int fd;
if ((fd = open(file, O_CREAT | O_RDWR, FILEMODE)) < 0) {
perror("Failed to open file");
return 0;
}
if(fallocate(fd, 0, 0, sz))
{
perror("Failed to allocate");
close(fd);
return 0;
}
register struct mmap map = { .fd = fd, .ptr = NULL, .len = sz };
if ((map.ptr = mmap(NULL, map.len, PROT_READ | PROT_WRITE, MAP_SHARED,fd, 0)) == MAP_FAILED) {
perror("mmap() failed");
close(fd);
return 0;
}
*ptr = map;
return 1;
}
int unmap_and_close(mmap_t map)
{
register int rval=1;

Loading…
Cancel
Save