diff --git a/.gitignore b/.gitignore index 58b21ee..3fa48f6 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,5 @@ obj build/ test/ +shuffle3-* +profiling/ diff --git a/Makefile b/Makefile index cf9841e..343642d 100644 --- a/Makefile +++ b/Makefile @@ -17,6 +17,8 @@ RELEASE_LDFLAGS?= -O3 -flto DEBUG_CFLAGS?= -g -O0 DEBUG_LDFLAGS?= -O0 +STRIP=strip + OBJ = $(addprefix obj/,$(SRC:.c=.o)) .PHONY: release @@ -36,7 +38,7 @@ $(BUILD)/$(PROJECT)-release: CFLAGS+= $(RELEASE_CFLAGS) $(BUILD)/$(PROJECT)-release: LDFLAGS+= $(RELEASE_LDFLAGS) $(BUILD)/$(PROJECT)-release: $(OBJ) $(CC) $^ $(CFLAGS) -o $@ $(LDFLAGS) - strip $@ + $(STRIP) $@ $(BUILD)/$(PROJECT)-debug: CFLAGS+= $(DEBUG_CFLAGS) $(BUILD)/$(PROJECT)-debug: LDFLAGS+= $(DEBUG_LDFLAGS) diff --git a/TODO b/TODO new file mode 100644 index 0000000..51b0f0e --- /dev/null +++ b/TODO @@ -0,0 +1 @@ +Complete rewrite using `mmap` et al. Maybe in Rust of C++ idk diff --git a/lean/Makefile b/lean/Makefile new file mode 100644 index 0000000..5a65e66 --- /dev/null +++ b/lean/Makefile @@ -0,0 +1,73 @@ +SRC_C = $(wildcard src/*.c) +SRC_CXX = $(wildcard src/*.cpp) +SRC_CXX+= $(wildcard src/rng/*.cpp) + +INCLUDE = include + +PROJECT=shuffle3 + +COMMON_FLAGS = -Wall -pedantic $(addprefix -I,$(INCLUDE)) -fno-strict-aliasing + +OPT_FLAGS?= -march=native -fgraphite -fopenmp -floop-parallelize-all -ftree-parallelize-loops=4 \ + -floop-interchange -ftree-loop-distribution -floop-strip-mine -floop-block \ + -fno-stack-check + +CXX_OPT_FLAGS?= $(OPT_FLAGS) -felide-constructors + +CFLAGS += $(COMMON_FLAGS) --std=gnu11 +CXXFLAGS += $(COMMON_FLAGS) --std=gnu++20 -fno-exceptions +LDFLAGS += -lfmt + +STRIP=strip + +RELEASE_CFLAGS?= -O3 -flto $(OPT_FLAGS) +RELEASE_CXXFLAGS?= -O3 -flto $(CXX_OPT_FLAGS) +RELEASE_LDFLAGS?= -O3 -flto + +DEBUG_CFLAGS?= -O0 -g -DDEBUG +DEBUG_CXXFLAGS?= $(DEBUG_CFLAGS) +DEBUG_LDFLAGS?= + +# Objects + +OBJ_C = $(addprefix obj/c/,$(SRC_C:.c=.o)) +OBJ_CXX = $(addprefix obj/cxx/,$(SRC_CXX:.cpp=.o)) +OBJ = $(OBJ_C) $(OBJ_CXX) + +# Phonies + +.PHONY: release +release: | dirs $(PROJECT)-release + +.PHONY: debug +debug: | dirs $(PROJECT)-debug + +# Targets + +dirs: + @mkdir -p obj/c{,xx}/src + @mkdir -p obj/cxx/src/rng + +obj/c/%.o: %.c + $(CC) -c $< $(CFLAGS) -o $@ $(LDFLAGS) + +obj/cxx/%.o: %.cpp + $(CXX) -c $< $(CXXFLAGS) -o $@ $(LDFLAGS) + + +$(PROJECT)-release: CFLAGS+= $(RELEASE_CFLAGS) +$(PROJECT)-release: CXXFLAGS += $(RELEASE_CXXFLAGS) +$(PROJECT)-release: LDFLAGS += $(RELEASE_LDFLAGS) +$(PROJECT)-release: $(OBJ) + $(CXX) $^ $(CXXFLAGS) -o $@ $(LDFLAGS) + $(STRIP) $@ + +$(PROJECT)-debug: CFLAGS+= $(DEBUG_CFLAGS) +$(PROJECT)-debug: CXXFLAGS += $(DEBUG_CXXFLAGS) +$(PROJECT)-debug: LDFLAGS += $(DEBUG_LDFLAGS) +$(PROJECT)-debug: $(OBJ) + $(CXX) $^ $(CXXFLAGS) -o $@ $(LDFLAGS) + +clean: + rm -rf obj + rm -f $(PROJECT)-{release,debug} diff --git a/lean/TODO.org b/lean/TODO.org new file mode 100644 index 0000000..ad35f41 --- /dev/null +++ b/lean/TODO.org @@ -0,0 +1,35 @@ +* shuffle3-lean + Redegisn/upgrade of =shuffle3= + +* Goals + - [X] Functioning in-place shuffle/unshuffle + - [X] Shuffle + - [X] Unshuffle + - [X] Usable in-place s/us from command line + - [X] Shuffle + - [X] Unshuffle + - [ ] Functioning out-of-place/in-memory shuffle/unshuffle + - [ ] Shuffle + - [ ] Unshuffle + - [ ] Usable out-of-place s/us from command line + - [ ] Shuffle + - [ ] Unshuffle + +** NO compatibility with =shuffle3= + =shuffle3='s ~drng~ PRNG algorithm uses an outdated global state backend. We don't want to reuse this. + As a result, output from =shuffle3= and =shuffle3-lean= is different. + +* Improvements + - *~70-80x* speedup from shuffle3 1.0 + - Huge reduction in syscalls + - Takes advantage of the kernel's fs cache + - Can properly handle large files without core dumping + - Doesn't dump huge amounts of trash onto each stack frame + +* Todo + - [X] impl rng + - [X] impl shuffling + - [ ] impl out-of-place shuffling + - [-] arg parsing and dispatch + - [X] simple parsing + - [ ] complex parsing diff --git a/lean/include/colours.h b/lean/include/colours.h new file mode 100644 index 0000000..604635e --- /dev/null +++ b/lean/include/colours.h @@ -0,0 +1,28 @@ +// https://stackoverflow.com/a/30304782 + +#ifndef _COLORS_ +#define _COLORS_ + +/* FOREGROUND */ +#define RST "\x1B[0m" +#define KRED "\x1B[31m" +#define KGRN "\x1B[32m" +#define KYEL "\x1B[33m" +#define KBLU "\x1B[34m" +#define KMAG "\x1B[35m" +#define KCYN "\x1B[36m" +#define KWHT "\x1B[37m" + +#define FRED(x) KRED x RST +#define FGRN(x) KGRN x RST +#define FYEL(x) KYEL x RST +#define FBLU(x) KBLU x RST +#define FMAG(x) KMAG x RST +#define FCYN(x) KCYN x RST +#define FWHT(x) KWHT x RST + +#define BOLD(x) "\x1B[1m" x RST +#define UNDL(x) "\x1B[4m" x RST + +#endif /* _COLORS_ */ + diff --git a/lean/include/debug.h b/lean/include/debug.h new file mode 100644 index 0000000..334e6d1 --- /dev/null +++ b/lean/include/debug.h @@ -0,0 +1,43 @@ +#ifndef _DEBUG_H +#define _DEBUG_H + +#ifdef __cplusplus +extern "C" { +#endif + +struct debuginfo { + const char* file; + const char* function; + int line; +}; + +void _do_dprintf(struct debuginfo di, const char* fmt, ...); + +#ifdef __cplusplus +extern "C++" { +#include + template + inline void _real_dprintf(const char* file, const char* function, int line, const char* fmt, Args&&... args) + { +#ifdef DEBUG + debuginfo i = { file, function, line }; + _do_dprintf(i, fmt, std::forward(args)...); +#endif + } +#define D_dprintf(fmt, ...) _real_dprintf(__FILE__, __func__, __LINE__, fmt __VA_OPT__(,) __VA_ARGS__) +} +#else + +#ifdef DEBUG +#define D_dprintf(fmt, ...) _do_dprintf( (struct debuginfo){.file = __FILE__, .function = __func__, .line = __LINE__}, fmt __VA_OPT__(,) __VA_ARGS__) +#else +static inline void _do__nothing(const char* fmt, ...) {} +#define D_dprintf(fmt, ...) _do__nothing(fmt __VA_OPT__(,) __VA_ARGS__) //(fmt __VA_OPT__(,) __VA_ARGS__, (void)0) +#endif +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _DEBUG_H */ diff --git a/lean/include/map.h b/lean/include/map.h new file mode 100644 index 0000000..307481a --- /dev/null +++ b/lean/include/map.h @@ -0,0 +1,74 @@ +#ifndef _MAP_H +#define _MAP_H + +#ifdef __cplusplus +extern "C" { +#define restrict __restrict__ +#endif + +#include + +typedef struct mmap { + int fd; + + void* ptr; + size_t len; +} mmap_t; + +int open_and_map(const char* file, mmap_t* restrict ptr); +int unmap_and_close(mmap_t map); + +typedef void* (*map_cb)(mmap_t map, void* user); +void* map_and_then(const char* file, map_cb callback, void* user); + +#ifdef __cplusplus +} +#include +#include "reinterpret.h" +#include +namespace mm { + struct mmap { + inline static mmap_t create_raw(const char* file) + { + mmap_t map; + if (!open_and_map(file, &map)) panic("Failed to map file"); + return map; + } + + inline mmap(mmap_t raw) :inner(raw){} + inline mmap(const char* file) + : inner(create_raw(file)) {} + + inline mmap(mmap&& move) : inner(move.inner) + { + auto other = const_cast(&move.inner); + other->ptr = nullptr; + } + inline mmap(const mmap& copt) = delete; + + inline ~mmap() + { + if (inner.ptr) { + ::unmap_and_close(inner); + } + } + + inline const span as_span() const { return span(as_ptr(), size()); } + inline span as_span() { return span(as_ptr(), size()); } + + inline const std::uint8_t* as_ptr() const { return (const std::uint8_t*)inner.ptr; } + inline std::uint8_t* as_ptr() { return (std::uint8_t*)inner.ptr; } + + inline std::size_t size() const { return inner.len; } + + inline int as_fd() const { return inner.fd; } + inline const mmap_t& as_raw() const { return inner; } + private: + const mmap_t inner; + }; +} + +#undef restrict +#endif + +#endif /* _MAP_H */ diff --git a/lean/include/panic.h b/lean/include/panic.h new file mode 100644 index 0000000..3ce71d5 --- /dev/null +++ b/lean/include/panic.h @@ -0,0 +1,35 @@ +#ifndef _PANIC_H +#define _PANIC_H + +#ifdef __cplusplus +extern "C" { +#endif + +struct panicinfo { + const char* file; + const char* function; + int line; +}; + +void _do_panic(struct panicinfo pi, const char* fmt, ...) __attribute__((noreturn)); + +#ifdef __cplusplus +extern "C++" { +#include + template + __attribute__((noreturn)) inline void _real_panic(const char* file, const char* function, int line, const char* fmt, Args&&... args) + { + panicinfo i = { file, function, line }; + _do_panic(i, fmt, std::forward(args)...); + } +#define panic(fmt, ...) _real_panic(__FILE__, __func__, __LINE__, fmt __VA_OPT__(,) __VA_ARGS__) +} +#else +#define panic(fmt, ...) _do_panic( (struct panicinfo){.file = __FILE__, .function = __func__, .line = __LINE__}, fmt __VA_OPT__(,) __VA_ARGS__) +#endif + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/lean/include/reinterpret.h b/lean/include/reinterpret.h new file mode 120000 index 0000000..5a88a2e --- /dev/null +++ b/lean/include/reinterpret.h @@ -0,0 +1 @@ +reinterpret.hpp \ No newline at end of file diff --git a/lean/include/reinterpret.hpp b/lean/include/reinterpret.hpp new file mode 100644 index 0000000..68646c8 --- /dev/null +++ b/lean/include/reinterpret.hpp @@ -0,0 +1,61 @@ +#ifndef _REINTERPRET_H +#define _REINTERPRET_H + +#include + +#ifdef __cplusplus +#define restrict __restrict__ +#include +#include +#else +#include +#include +#endif + +#ifdef __cplusplus +template +struct span { + inline span(T* ptr, std::size_t len) : ptr(ptr), len(len) {} + + template + inline span reinterpret() const + { + auto bytes = size_bytes(); + //if (len_b % sizeof(U) != 0) panic("Cannot reinterpret T to U due to unmatch sizing constraints."); + + return span((U*)ptr, bytes / sizeof(U)); + } + + inline const T& operator[](std::size_t i) const + { + if (i >= len) panic("Out of bounds access: %lu >= %lu", i, len); + return ptr[i]; + } + inline T& operator[](std::size_t i) + { + if (i >= len) panic("Out of bounds access: %lu >= %lu", i, len); + return ptr[i]; + } + + inline const T* as_ptr() const { return ptr; } + inline T* as_ptr() { return ptr; } + + inline const T* operator&() const { return as_ptr(); } + inline T* operator&() { return as_ptr(); } + + inline std::size_t size_bytes() const { return len * sizeof(T); } + inline std::size_t size() const { return len; } +private: + T* const ptr; + const std::size_t len; +}; + +extern "C" { +#endif +uint64_t* bytes_to_long(uint8_t* ptr, size_t ptr_sz, size_t* restrict nsize); +float* bytes_to_float(uint8_t* ptr, size_t ptr_sz, size_t* restrict nsize); +#ifdef __cplusplus +} +#endif + +#endif /* _REINTERPRET_H */ diff --git a/lean/include/rng.h b/lean/include/rng.h new file mode 100644 index 0000000..4a19152 --- /dev/null +++ b/lean/include/rng.h @@ -0,0 +1,59 @@ +#ifndef _RNG_H +#define _RNG_H + +#include "shuffle3.h" + +#ifdef __cplusplus +extern "C" { +#endif + +enum rng_kind { + RNG_KIND_FRNG, + RNG_KIND_DRNG, + RNG_KIND_XORNG, +}; + +typedef struct rng_init +{ + enum rng_kind kind; + union { + struct { + double state[2]; + } frng; + struct { + int32_t state; + } drng; + struct { + uint64_t state[2]; + } xorng; + } init; + +} rng_init_opt; + +typedef struct rng_impl* _UNIQUE rng_t; + +rng_t rng_new(rng_init_opt kind); +#define RNG_INIT(_kind,...) ((rng_init_opt){.kind=(_kind), .init.__VA_ARGS__ }) +void rng_free(rng_t ptr); + +// Tests +extern void rng_test(); +extern void rng_test_spec(rng_t rng); + +#ifdef __cplusplus +} +// RNG interfaces +#include +#include +#include + +namespace rng { + void test_algo(RNG&& rng); + +template::value>::value __fuck> + inline void test_algo(R&& rng) { test_algo(static_cast(rng)); } +} +#endif + +#endif /* _RNG_H */ diff --git a/lean/include/rng/drng.hpp b/lean/include/rng/drng.hpp new file mode 100644 index 0000000..217fdee --- /dev/null +++ b/lean/include/rng/drng.hpp @@ -0,0 +1,22 @@ + +#include "impl.hpp" +#include +namespace rng +{ + struct drng : public RNG + { + inline drng(std::uint32_t seed) : state(seed){ + D_dprintf("drng: seeded with %u", seed); + //dprintf(" dummy run sample: %f", sample()); + } + inline drng() : drng(1){} + + static drng from_time(); + + int rand(); + protected: + double sample() override; + private: + std::uint32_t state; + }; +} diff --git a/lean/include/rng/frng.hpp b/lean/include/rng/frng.hpp new file mode 100644 index 0000000..4b9c3f0 --- /dev/null +++ b/lean/include/rng/frng.hpp @@ -0,0 +1,66 @@ + +#include "impl.hpp" +#include + +#include + +namespace rng +{ + struct frng : public RNG + { + template + static constexpr inline double dot(const std::array& v, const std::array& u) + { + double res=0; + for(std::size_t i=0;i& state) + { + const constexpr std::array vec2 = { 12.9898, 78.223 }; + return fract(sin(dot(state, vec2)) * 43758.5453); + } + +#define P D_dprintf("frng: seeded with (%f, %f)", state[0], state[1]); + inline constexpr frng(double s1, double s2) : state({s1, s2}){P} + inline constexpr frng(const std::array& ar) : state(ar){P} + inline constexpr frng(std::array&& ar) : state(ar){P} + inline constexpr frng(const double (&ar)[2]) : state({ar[0], ar[1]}) {P} +#undef P + inline constexpr double next_double() override { return sample(); } + inline constexpr float next_float() override { return (float)sample(); } + protected: + inline constexpr double sample() override + { + double res = sample_double(state); + update_state(state, res); + return res; + } + private: + std::array state; + static inline constexpr void update_state(std::array& state, double r) + { + float v1 = (float)state[0]; + float v2 = (float)state[1]; + + std::array nvec = { + r, + (double)v2, + }; + + state[0] = sample_double(nvec); + + nvec[1] = (double)v1; + state[1] = sample_double(nvec); + } + }; +} + diff --git a/lean/include/rng/impl.hpp b/lean/include/rng/impl.hpp new file mode 100644 index 0000000..aa20b77 --- /dev/null +++ b/lean/include/rng/impl.hpp @@ -0,0 +1,35 @@ +#pragma once + +#include +#include +#include + +/// Base class for RNG impls +struct RNG { + virtual unsigned char byte(); + virtual void bytes(unsigned char* ptr, std::size_t len); + + template + inline void bytes(unsigned char (&arr)[N]) { return bytes(arr, N); } + template + inline void bytes(std::array& array) { return bytes(&array[0], N); } + inline void bytes(std::vector& vec) { return bytes(&vec[0], vec.size()); } + + bool chance(); + virtual bool chance(double chance); + + virtual std::int32_t next_int(); + inline std::int32_t next_int(std::int32_t max) { return next_int(0, max); } + std::int32_t next_int(std::int32_t min, std::int32_t max); + + virtual std::int64_t next_long(); + inline std::int64_t next_long(std::int64_t max) { return next_long(0, max); } + std::int64_t next_long(std::int64_t min, std::int64_t max); + + inline virtual float next_float() { return (float)sample(); } + inline virtual double next_double() { return sample(); } + + virtual ~RNG() = default; +protected: + virtual double sample() = 0; +}; diff --git a/lean/include/rng/xoroshiro128plus.hpp b/lean/include/rng/xoroshiro128plus.hpp new file mode 100644 index 0000000..d0dddc1 --- /dev/null +++ b/lean/include/rng/xoroshiro128plus.hpp @@ -0,0 +1,29 @@ +#pragma once + +#include "impl.hpp" +#include + +namespace rng +{ + struct xoroshiro128plus : public RNG + { + using State = std::array; +#define P D_dprintf("xorng: seeded with (%lu, %lu)", state[0], state[1]); + inline constexpr xoroshiro128plus(std::uint64_t s0, std::uint64_t s1) : state({s0, s1}){P} + inline constexpr xoroshiro128plus(std::array&& ar) : state(ar){P} + inline constexpr xoroshiro128plus(const std::array& ar) : state(ar){P} + inline constexpr xoroshiro128plus(const std::uint64_t (&ar)[2]) : state({ar[0], ar[1]}){P} +#undef P + std::uint64_t next_ulong(); + using RNG::next_long; + std::int64_t next_long() override; + + void jump(); + void long_jump(); + protected: + double sample() override; + private: + State state; + }; +} + diff --git a/lean/include/shuffle.hpp b/lean/include/shuffle.hpp new file mode 100644 index 0000000..289f668 --- /dev/null +++ b/lean/include/shuffle.hpp @@ -0,0 +1,38 @@ +#pragma once + +#include +#include +#include +#include + +namespace rng { + template + inline void shuffle(R& rng, span span) + { + if(!span.size()) return; + fmt::print(" -> shuffling {} objects...", span.size()); + for(std::size_t i=span.size()-1;i>0;i--) + { + auto j = rng.next_long(i); + std::swap(span[i], span[j]); + } + fmt::print(" OK\n"); + } + + template + inline void unshuffle(R& rng, span span) + { + if(!span.size()) return; + std::vector rng_values(span.size()); + + fmt::print(" -> unshuffling {} objects...", span.size()); + for(std::size_t i=span.size()-1;i>0;i--) + rng_values.push_back(rng.next_long(i)); + + for(std::size_t i=1;i%s:%d] " fmt "\n", __func__, __LINE__ __VA_OPT__(,) __VA_ARGS__) +#else +#define dprintf(fmt, ...) +#endif +*/ +extern const char* _prog_name; + +#ifdef __cplusplus +} +#endif + +#endif /* _SHUFFLE3_H */ diff --git a/lean/include/work.h b/lean/include/work.h new file mode 100644 index 0000000..51ff16b --- /dev/null +++ b/lean/include/work.h @@ -0,0 +1,52 @@ +#ifndef _WORK_H +#define _WORK_H + +#ifdef __cplusplus +extern "C" { +#endif + +enum work_buffer_opts { + WORK_BO_CPIP=0, // Copy then run in place on output file (default) + WORK_BO_BUFFERED=1, // Read whole input into memory then perform +}; + +typedef struct _work_args { + enum { + OP_SHUFFLE_IP, // Shuffle in place + OP_SHUFFLE_OP, // Shuffle out of place + + OP_UNSHUFFLE_IP, // Unshuffle in place + OP_UNSHUFFLE_OP, // Unshuffle out of place + + OP_HELP, // Print help then exit + } op; + union { + struct { + const char* file; + } op_shuffle_ip; + + struct { + enum work_buffer_opts buffered; + const char* ifile; + const char* ofile; + } op_shuffle_op; + + struct { + const char* file; + } op_unshuffle_ip; + + struct { + enum work_buffer_opts buffered; + const char* ifile; + const char* ofile; + } op_unshuffle_op; + } data; +} work_args_t; + +int do_work(const work_args_t args); + +#ifdef __cplusplus +} +#endif + +#endif /* _WORK_H */ diff --git a/lean/src/debug.c b/lean/src/debug.c new file mode 100644 index 0000000..c830c4b --- /dev/null +++ b/lean/src/debug.c @@ -0,0 +1,19 @@ +#include +#include +#include + +#include + +#include + +void _do_dprintf(struct debuginfo info, const char* fmt, ...) +{ +#ifdef DEBUG + va_list li; + va_start(li, fmt); + fprintf(stderr, "["FGRN("dbg")" " FYEL("@") BOLD("%s") "->" FWHT("%s") ":" FYEL("%d") "]: ", info.file, info.function,info.line); + vfprintf(stderr, fmt, li); + fprintf(stderr, "\n"); + va_end(li); +#endif +} diff --git a/lean/src/main.c b/lean/src/main.c new file mode 100644 index 0000000..7e5e736 --- /dev/null +++ b/lean/src/main.c @@ -0,0 +1,78 @@ +#include +#include + +#include + + +#include +#include +#include +#include +#include +#include + +#include + +#define noreturn __attribute__((noreturn)) void + +_Static_assert(sizeof(float)==sizeof(uint32_t), "float is not 32 bits"); + +const char* _prog_name; + +noreturn help_then_exit() +{ + fprintf(stderr, "Try passing `-h`\n"); + exit(1); +} + +void usage() +{ + printf( "shuffle3 - 3 pass binary shuffler\n" + "Usage: %s -s \n" + "Usage: %s -u \n", _prog_name, _prog_name); + printf("\nOPTIONS:\n" + " -s\tShuffle file in place\n" + " -u\tUnshuffle file in place\n"); +} + +int main(int argc, char** argv) +{ + _prog_name = argv[0]; + + work_args_t parsed; + + if( !argv[1] || *(argv[1]) != '-') help_then_exit(); + + D_dprintf("Parsing `%c'", argv[1][1]); + switch(argv[1][1]) + { + case 's': + parsed.op = OP_SHUFFLE_IP; + if(!(parsed.data.op_shuffle_ip.file = argv[2])) + { + fprintf(stderr, "Error: -s expected file argument.\n"); + return 1; + } + D_dprintf("parsed.op = %d", OP_SHUFFLE_IP); + break; + case 'u': + parsed.op = OP_UNSHUFFLE_IP; + if(!(parsed.data.op_unshuffle_ip.file = argv[2])) + { + fprintf(stderr, "Error: -u expected file argument.\n"); + return 1; + } + D_dprintf("parsed.op = %d", OP_UNSHUFFLE_IP); + break; + case 'h': + usage(); + return 0; + default: + fprintf(stderr, "Error: unknown argument `%s'\n\n", argv[1]); + help_then_exit(); + panic("Unreachable"); + } + + return do_work(parsed); +} + diff --git a/lean/src/map.c b/lean/src/map.c new file mode 100644 index 0000000..2a245af --- /dev/null +++ b/lean/src/map.c @@ -0,0 +1,56 @@ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include + +#define FILEMODE S_IRWXU | S_IRGRP | S_IROTH + +#include + +int open_and_map(const char* file, mmap_t* restrict ptr) +{ + int fd; + struct stat st; + if ((fd = open(file, O_RDWR, FILEMODE)) < 0) { + perror("Failed to open file"); + return 0; + } + + if (fstat(fd, &st) < 0) { + perror("Failed to stat file"); + close(fd); + return 0; + } + + register struct mmap map = { .fd = fd, .ptr = NULL, .len = st.st_size }; + + if ((map.ptr = mmap(NULL, map.len, PROT_READ | PROT_WRITE, MAP_SHARED,fd, 0)) == MAP_FAILED) { + perror("mmap() failed"); + close(fd); + return 0; + } + + *ptr = map; + + return 1; +} + +int unmap_and_close(mmap_t map) +{ + register int rval=1; + if (munmap(map.ptr, map.len) < 0) { + perror("munmap() failed"); + rval=0; + } + if (close(map.fd) <0) { + perror("Failed to close fd"); + rval=0; + } + + return rval; +} diff --git a/lean/src/map_callback.cpp b/lean/src/map_callback.cpp new file mode 100644 index 0000000..3c3f7f5 --- /dev/null +++ b/lean/src/map_callback.cpp @@ -0,0 +1,11 @@ + +#include +#include +#include + + +extern "C" void* map_and_then(const char* file, map_cb callback, void* user) +{ + mm::mmap map(file); + return callback(map.as_raw(), user); +} diff --git a/lean/src/panic.c b/lean/src/panic.c new file mode 100644 index 0000000..20a10b1 --- /dev/null +++ b/lean/src/panic.c @@ -0,0 +1,18 @@ +#include +#include +#include + +#include + +#include + +__attribute__((noreturn)) void _do_panic(struct panicinfo info, const char* fmt, ...) +{ + va_list li; + va_start(li, fmt); + fprintf(stderr, BOLD(UNDL(FRED("[!]"))) " (" BOLD("%s") "->" BOLD(FRED("%s")) ":" FYEL("%d") ") " BOLD(FRED("fatal error")) ": ", info.file, info.function, info.line); + vfprintf(stderr, fmt, li); + fprintf(stderr, "\n"); + va_end(li); + abort(); +} diff --git a/lean/src/reinterpret.cpp b/lean/src/reinterpret.cpp new file mode 100644 index 0000000..2bf4e79 --- /dev/null +++ b/lean/src/reinterpret.cpp @@ -0,0 +1,22 @@ +#include +#include + +template +static inline T* bytes_to_t(std::uint8_t* ptr, std::size_t ptr_sz, std::size_t* restrict nsize) +{ + span bytes(ptr, ptr_sz); + auto tout = bytes.reinterpret(); + *nsize = tout.size(); + return tout.as_ptr(); +} + +extern "C" { + uint64_t* bytes_to_long(uint8_t* ptr, size_t ptr_sz, size_t* restrict nsize) + { + return bytes_to_t(ptr, ptr_sz, nsize); + } + float* bytes_to_float(uint8_t* ptr, size_t ptr_sz, size_t* restrict nsize) + { + return bytes_to_t(ptr, ptr_sz, nsize); + } +} diff --git a/lean/src/rng.cpp b/lean/src/rng.cpp new file mode 100644 index 0000000..e6d1a58 --- /dev/null +++ b/lean/src/rng.cpp @@ -0,0 +1,92 @@ +#include +#include + +#include + +inline unsigned char RNG::byte() +{ + return (unsigned char)next_int(255); +} + +void RNG::bytes(unsigned char* ptr, std::size_t len) +{ + for(std::size_t i=0;i +#include +namespace { // C interface + using namespace std; +#define extract_ptr(ptr) ((RNG*)(ptr)) + static inline RNG& extract_ref(rng_t rng) + { + return *extract_ptr(rng); + } + template + static inline T* extract_downcast_ptr(rng_t rng) + { + return dynamic_cast(extract_ptr(rng)); + } + + extern "C" + { + + rng_t rng_new(rng_init_opt opt) + { + switch(opt.kind) + { + case RNG_KIND_FRNG: return (rng_t) new rng::frng(opt.init.frng.state); + case RNG_KIND_DRNG: return (rng_t) new rng::drng(opt.init.drng.state); + case RNG_KIND_XORNG: return (rng_t) new rng::xoroshiro128plus(opt.init.xorng.state); + default: panic("Unknown RNG init opt: %d", opt.kind); + } + return nullptr; + } + + void rng_free(rng_t rng) + { + RNG* ptr = (RNG*)rng; + delete ptr; + } + + void rng_test_spec(rng_t rng) + { + cout << "rng_test_spec:" << endl; + rng::test_algo(std::move(extract_ref(rng))); + } + } +} diff --git a/lean/src/rng/drng.cpp b/lean/src/rng/drng.cpp new file mode 100644 index 0000000..2512768 --- /dev/null +++ b/lean/src/rng/drng.cpp @@ -0,0 +1,21 @@ +#include +#include +#include + +#include + +namespace rng +{ + drng drng::from_time() { return drng(time(NULL)); } + + int drng::rand() + { + return rand_r(&state); + } + + double drng::sample() + { + int val = rand_r(&state); + return (double)val / (double)RAND_MAX; + } +} diff --git a/lean/src/rng/frng.cpp b/lean/src/rng/frng.cpp new file mode 100644 index 0000000..eccec41 --- /dev/null +++ b/lean/src/rng/frng.cpp @@ -0,0 +1,3 @@ +#include +#include +#include diff --git a/lean/src/rng/test.cpp b/lean/src/rng/test.cpp new file mode 100644 index 0000000..f0bad07 --- /dev/null +++ b/lean/src/rng/test.cpp @@ -0,0 +1,37 @@ +#include +#include + +namespace rng { + void test_algo(RNG&& rng) + { + using namespace std; + + for(int i=0;i<10;i++) { + double d = rng.next_double(); + long l = rng.next_long(-10, 10); + + std::array ar; + for(auto& i : ar) i = rng.chance(); + + cout << "\t(Sampled: " << d; + cout << ", Long: " << l; + cout << ", Bools: [ "; + for(const auto& i : ar) cout << i << " "; + cout << "])" << endl; + } + } +} + +extern "C" void rng_test() +{ + using namespace std; + + cout << "frng:" << endl; + rng::test_algo(rng::frng(1.0, 1.2)); + + cout << "drng:" << endl; + rng::test_algo(rng::drng(10)); + + cout << "xoroshiro128+:" << endl; + rng::test_algo(rng::xoroshiro128plus(100ul, 200ul)); +} diff --git a/lean/src/rng/xoroshiro128plus.cpp b/lean/src/rng/xoroshiro128plus.cpp new file mode 100644 index 0000000..71827f1 --- /dev/null +++ b/lean/src/rng/xoroshiro128plus.cpp @@ -0,0 +1,86 @@ +#include +#include + +using u64 = std::uint64_t; + +#define XO xoroshiro128plus + +static inline constexpr u64 rotl(u64 x, int k) +{ + return (x << k) | (x >> (64 - k)); +} + +namespace rng +{ + inline constexpr u64 next(XO::State& s) + { + u64 s0 = s[0]; + u64 s1 = s[1]; + u64 result = s0 + s1; + + s1 ^= s0; + s[0] = rotl(s0, 24) ^ s1 ^ (s1 << 16); + s[1] = rotl(s1, 37); + + return result; + } + inline constexpr void xo_jump(XO::State& s) + { + constexpr const std::uint64_t JUMP[] = { 0xdf900294d8f554a5, 0x170865df4b3201fc }; + + std::uint64_t s0 = 0; + std::uint64_t s1 = 0; + for(u64 i = 0; i < sizeof JUMP / sizeof *JUMP; i++) + for(int b = 0; b < 64; b++) { + if (JUMP[i] & UINT64_C(1) << b) { + s0 ^= s[0]; + s1 ^= s[1]; + } + next(s); + } + + s[0] = s0; + s[1] = s1; + } + + inline constexpr void xo_long_jump(XO::State& s) + { + constexpr const uint64_t LONG_JUMP[] = { 0xd2a98b26625eee7b, 0xdddf9b1090aa7ac1 }; + + std::uint64_t s0 = 0; + std::uint64_t s1 = 0; + for(u64 i = 0; i < sizeof LONG_JUMP / sizeof *LONG_JUMP; i++) + for(int b = 0; b < 64; b++) { + if (LONG_JUMP[i] & UINT64_C(1) << b) { + s0 ^= s[0]; + s1 ^= s[1]; + } + next(s); + } + + s[0] = s0; + s[1] = s1; + } + + + void XO::jump() { xo_jump(state); } + void XO::long_jump() { xo_long_jump(state); } + + std::uint64_t XO::next_ulong() + { + return next(state); + } + std:: int64_t XO::next_long() + { + const u64 v = next_ulong(); + static_assert(sizeof(v) == sizeof(decltype(next_long()))); + + return *(const std::int64_t*)&v; + } + + + double XO::sample() + { + return (next(state) & ((INT64_C(1) << 53) - 1)) * (1.00 / (INT64_C(1) << 53)); + } +} diff --git a/lean/src/work.cpp b/lean/src/work.cpp new file mode 100644 index 0000000..20c9ed4 --- /dev/null +++ b/lean/src/work.cpp @@ -0,0 +1,129 @@ +#include +#include +#include + +#include + +#include +#include +#include +#include +#include + +#include +#include + +template +std::tuple minmax_t(const span& array, Fn keep) +{ + T highest; + T lowest; + bool init=false; + D_dprintf("minmax_t: %p (%lu)", array.as_ptr(), array.size()); + for(std::size_t i=0;i highest) highest = item; + } + //fmt::print("MMX {}, {}\n", lowest, highest); + return {lowest, highest}; +} + +template +inline std::tuple minmax_t(const span& array) +{ + return minmax_t(array, [](T _val) { return true; }); +} + +namespace work +{ + /// Shuffle or unshuffle in place + template + int xshuffle_ip(const char* file) + { + mm::mmap map(file); + + if constexpr(unshuffle) + { + auto [byte_l, byte_h] = minmax_t(map.as_span().reinterpret()); + D_dprintf("MMX res (s8): %d -- %d", byte_l, byte_h); + rng::drng drng((std::int32_t) ((0xfffa << 16) | (byte_l<<7) | byte_h )); + rng::unshuffle(drng, map.as_span()); + + auto [float_l, float_h] = minmax_t(map.as_span().reinterpret(), [](float f) -> bool { return !( (f!=f) || f < -FLT_MAX || f > FLT_MAX); }); + D_dprintf("MMX res (f32): %f -- %f", float_l, float_h); + rng::frng frng(float_l, float_h); + rng::unshuffle(frng, map.as_span().reinterpret()); + + auto [long_l, long_h] = minmax_t(map.as_span().reinterpret()); + D_dprintf("MMX res (u64): %ld -- %ld", long_l, long_h); + rng::xoroshiro128plus xorng(*(const std::uint64_t*)&long_l, *(const std::uint64_t*)&long_h); + rng::unshuffle(xorng, map.as_span().reinterpret()); + } else { + auto [long_l, long_h] = minmax_t(map.as_span().reinterpret()); + D_dprintf("MMX res (u64): %ld -- %ld", long_l, long_h); + rng::xoroshiro128plus xorng(*(const std::uint64_t*)&long_l, *(const std::uint64_t*)&long_h); + rng::shuffle(xorng, map.as_span().reinterpret()); + + auto [float_l, float_h] = minmax_t(map.as_span().reinterpret(), [](float f) -> bool { return !( (f!=f) || f < -FLT_MAX || f > FLT_MAX); }); + D_dprintf("MMX res (f32): %f -- %f", float_l, float_h); + rng::frng frng(float_l, float_h); + rng::shuffle(frng, map.as_span().reinterpret()); + + auto [byte_l, byte_h] = minmax_t(map.as_span().reinterpret()); + D_dprintf("MMX res (s8): %d -- %d", byte_l, byte_h); + rng::drng drng((std::int32_t) ((0xfffa << 16) | (byte_l<<7) | byte_h )); + rng::shuffle(drng, map.as_span()); + } + + return 0; + } + + /// Shuffle or unshuffle out of place + template + int xshuffle_op(const char* ifile, const char* ofile, bool is_buffered) + { + + if constexpr(unshuffle) + { + + } else { + + } + panic("Unimplemented"); + return 0; + } + +} + +int help() +{ + //Print help then exit + return 1; +} + +extern "C" int do_work(const work_args_t args) +{ + using A = decltype(args.op); + switch (args.op) { + case A::OP_SHUFFLE_IP: return work::xshuffle_ip(args.data.op_shuffle_ip.file); + case A::OP_SHUFFLE_OP: return work::xshuffle_op(args.data.op_shuffle_op.ifile, + args.data.op_shuffle_op.ofile, + args.data.op_shuffle_op.buffered == WORK_BO_BUFFERED); + case A::OP_UNSHUFFLE_IP: return work::xshuffle_ip(args.data.op_unshuffle_ip.file); + case A::OP_UNSHUFFLE_OP: return work::xshuffle_op(args.data.op_unshuffle_op.ifile, + args.data.op_unshuffle_op.ofile, + args.data.op_unshuffle_op.buffered == WORK_BO_BUFFERED); + case A::OP_HELP: return help(); + + default: panic("Unknown op %d", (int)args.op); + } + return 0; +}