diff --git a/include/fsvec.hpp b/include/fsvec.hpp index c2a603f..38be0d4 100644 --- a/include/fsvec.hpp +++ b/include/fsvec.hpp @@ -15,7 +15,7 @@ struct file_back_buffer file_back_buffer(); file_back_buffer(std::size_t cap); file_back_buffer(const file_back_buffer& c) = delete; - inline file_back_buffer(file_back_buffer&& m) : inner(std::move(m.inner)){} + file_back_buffer(file_back_buffer&& m); void push_buf(byte* buf, std::size_t len); bool back(byte* buf, std::size_t len) const; @@ -46,7 +46,7 @@ template struct file_vector { inline file_vector() : file_vector(file_back_buffer::DEFAULT_CAP){} - inline file_vector(std::size_t cap) : inserter(file_back_buffer(cap)), len(0), current_back(std::vector(sizeof(T))) {current_back.resize(sizeof(T));} + inline file_vector(std::size_t cap) : inserter(file_back_buffer(cap)), len(0), current_back(std::vector(sizeof(T))) {current_back.resize(sizeof(T));} inline file_vector(const file_vector& c) = delete; inline file_vector(file_vector&& m) : inserter(std::move(m.inserter)), len(m.len), current_back(std::move(m.current_back)){} @@ -74,8 +74,10 @@ struct file_vector if(!inserter.pop_n(sizeof(T))) panic("pop_back(): 0 elements"); len-=1; } + + inline const std::size_t size() const { return len; } private: - mutable std::vector current_back; // what an awful hack... file_back_buffer inserter; - std::uint64_t len; + std::size_t len; + mutable std::vector current_back; // what an awful hack... }; diff --git a/include/shuffle.hpp b/include/shuffle.hpp index b59e434..7486bef 100644 --- a/include/shuffle.hpp +++ b/include/shuffle.hpp @@ -5,6 +5,10 @@ #include #include +#include + +#include + namespace rng { template inline void shuffle(R& rng, span span) @@ -23,7 +27,12 @@ namespace rng { inline void unshuffle(R& rng, span span) { if(!span.size()) return; + +#ifdef _FS_SPILL_BUFFER + file_vector rng_values(span.size()); +#else std::vector rng_values(span.size()); +#endif std::cout << " -> unshuffling " << span.size() << " objects..."; for(std::size_t i=span.size()-1;i>0;i--) diff --git a/include/shuffle3.h b/include/shuffle3.h index 150bed0..af16299 100644 --- a/include/shuffle3.h +++ b/include/shuffle3.h @@ -14,6 +14,10 @@ extern "C" { #define _FORCE_INLINE __attribute__((gnu_inline)) extern inline #endif +//** Features **// + +//#define _FS_SPILL_BUFFER /* Use a file-backed buffer when unshuffling in cases of too high memory usage. Will cause massive slowdowns but can stop OOMs when unshuffling large files */ + extern const char* _prog_name; #ifdef __cplusplus diff --git a/src/fsvec.c b/src/fsvec.c index ed4a4bc..21d4e38 100644 --- a/src/fsvec.c +++ b/src/fsvec.c @@ -4,7 +4,7 @@ int fvec_new(fvec_t* restrict obj, const char* path) { - obj->backing = fopen(path, "wb"); + obj->backing = fopen(path, "w+b"); if(!obj->backing) { perror("fvec_new(): Failed to open file"); return 0; @@ -60,7 +60,7 @@ int fvec_get_whole_buffer(const fvec_t* restrict obj, void* _buffer, size_t _sz) while ( w<_sz && (c=fread(buffer+w, 1, sz-w, obj->backing))>0 ) w+=c; if (w!=_sz) { perror("Corrupted buffer state, aborting"); - panic("Cannot continue"); + panic("Cannot continue on FD %d (%p)", fileno(obj->backing), obj->backing); } return 1; diff --git a/src/fsvec.cpp b/src/fsvec.cpp index 365fd97..3aa196c 100644 --- a/src/fsvec.cpp +++ b/src/fsvec.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #define FB file_back_buffer using std::size_t; @@ -13,17 +14,25 @@ struct temp_file { inline temp_file(const temp_file& c) = delete; - inline temp_file(temp_file&& m) : name(std::move(m.name)) {} - inline temp_file() : name(uuid::generate().to_string()){} + inline temp_file(temp_file&& m) : name(std::move(m.name)) { m._full_path.clear(); } + inline temp_file() : name(uuid::generate().to_string()+"-s3"){} inline temp_file(const char* name) : name(name) {} inline temp_file(std::string&& name) : name(name) {} - inline ~temp_file() = default; + inline ~temp_file() + { + if(!_full_path.empty() && fs::exists(_full_path) ) { + D_dprintf("tfile removing: %s", _full_path.c_str()); + fs::remove(_full_path); + } + } inline const fs::path& full_path() const { - if(_full_path.empty()) - _full_path = fs::canonical( fs::temp_directory_path() / name ); + if(_full_path.empty()) { + _full_path = fs::absolute( fs::temp_directory_path() / name ); + D_dprintf("tfile path: %s", _full_path.c_str()); + } return _full_path; } inline const std::string& base_name() const { return name; } @@ -40,11 +49,6 @@ struct FB::impl temp_file file; fvec_t backing; - - inline ~impl() - { - fvec_close(&backing); - } }; FB::FB(size_t cap) : inner(std::make_unique()) @@ -56,6 +60,10 @@ FB::FB(size_t cap) : inner(std::make_unique()) } FB::FB() : FB(DEFAULT_CAP){} +FB::~FB() +{ + fvec_close(&inner->backing); +} void FB::push_buf(byte* buf, size_t len) { @@ -71,3 +79,27 @@ bool FB::pop_n(size_t len) { return !!fvec_pop_end(&inner->backing, len); } + +extern "C" void _fb_run_tests() +{ + file_vector test; + int r0,r1=0; + for(int i=0;i<10;i++) { + D_dprintf("push: %d", (10-i)); + test.push_back(10-i); + r1+= (10-i); + } + D_dprintf("r1: %d", r1); + r0=0; + while(test.size()) + { + r0+=test.back(); + D_dprintf("back: %d", test.back()); + test.pop_back(); + } + D_dprintf("r0: %d", r0); + + if(r0!=r1) panic("fb failed test: %d, %d", r0, r1); + + D_dprintf("test successful"); +} diff --git a/src/main.c b/src/main.c index 2115112..d04f605 100644 --- a/src/main.c +++ b/src/main.c @@ -19,6 +19,8 @@ _Static_assert(sizeof(float)==sizeof(uint32_t), "float is not 32 bits"); const char* _prog_name; +extern void _fb_run_tests(); + noreturn help_then_exit() { fprintf(stderr, "Try passing `-h`\n"); @@ -41,6 +43,7 @@ int main(int argc, char** argv) { _prog_name = argv[0]; + //_fb_run_tests(); work_args_t parsed; if( !argv[1] || *(argv[1]) != '-') help_then_exit();