diff --git a/include/comp.h b/include/comp.h new file mode 100644 index 0000000..ab52503 --- /dev/null +++ b/include/comp.h @@ -0,0 +1,17 @@ +#ifndef _COMP_H +#define _COMP_H + +#include +#include + +/// Find `needle` in `haystack`, sets the start of `needle`'s `origin` in `haystack`'s to `pos` and returns `true` if a match is found, otherwise returns false. +bool cmp_find(const map_t *pIN needle, const map_t *pIN haystack, usize *pOUT pos); +/// Check `needle` against `nhaystacks` number of haystack maps, store the offset in `sizes[n]` where `n` is the haystack number. +/// `sizes` must be at least `nhaystacks` long. The haystacks themselves are expected to be of type `const map_t* pIN` +/// Returns `-1` if all haystacks passed match, otherwise returns the index of the haystack that failed. +/// +/// # Panics +/// Will `FATAL` if any of the haystack pointers are `NULL` +int cmp_find_many(usize nhaystacks; const map_t *pIN needle, usize sizes[pOUT nhaystacks], usize nhaystacks, ...); + +#endif /* _COMP_H */ diff --git a/include/macros.h b/include/macros.h index 6c04166..662bba8 100644 --- a/include/macros.h +++ b/include/macros.h @@ -44,10 +44,12 @@ // Used like: `int *pOUT output, const int *pIN input` #define pOUT restrict #define pIN const restrict +#define pINOUT restrict // Used like: `int _OUT output, const int _IN input` #define _IN *pIN #define _OUT *pOUT +#define _INOUT *pINOUT // Type macros diff --git a/include/map.h b/include/map.h index 76c7938..96e5159 100644 --- a/include/map.h +++ b/include/map.h @@ -14,12 +14,16 @@ typedef enum mm_err { MAP_ERR_OPEN, MAP_ERR_CLOSE, MAP_ERR_STAT, + MAP_ERR_ADVISE, } map_result_t; typedef union memory_map { // Common struct { - void* origin; + union { + void* origin; + u8* area; + }; usize len; }; // Specific to anon / file @@ -42,6 +46,7 @@ map_result_t map_fd(int fd, bool write, usize size, off_t offset, map_t *pOUT ma map_result_t map_file(const char* file, bool write, usize size, off_t offset, map_t *pOUT map); map_result_t map_anon(void* ptr, usize len, map_t *pOUT map); map_result_t map_free(map_t map); +map_result_t map_preload(map_t *pINOUT map, bool random); const char* map_error_str(map_result_t res); diff --git a/include/project.h b/include/project.h index 8fd34c8..d90300d 100644 --- a/include/project.h +++ b/include/project.h @@ -35,6 +35,10 @@ #define PROG_RET_MAP_HAYSTACK_N_FAILED(h) ((h) << 2) // `h` is the number in argv[] of the haystack file #define PROG_RET_UNMAP_HAYSTACK_N_FAILED(h) (((h) << 2) | 1) +// Match of haystack number `h` failed. +#define PROG_RET_MAP_HAYSTACK_N_FAILED(h) ((h) << 2 | 2) +// Bitwise OR this with the number(s) of the haystack that match failed. It must not exceed INT_MAX-1 +#define PROG_RET_MAP_HAYSTACK_FAILED AS((AS(INT_MAX, u64) >> 1lu) ^ AS(INT_MAX, u64), int) // Unmapping of argv[1] failed #define PROG_RET_UNMAP_NEEDLE_FAILED 3 // Internal error diff --git a/src/comp.c b/src/comp.c new file mode 100644 index 0000000..436e188 --- /dev/null +++ b/src/comp.c @@ -0,0 +1,38 @@ +#define _GNU_SOURCE + +#include +#include + +#include +#include +#include + + +bool cmp_find(const map_t *pIN needle, const map_t *pIN haystack, usize *pOUT pos) +{ + u8* start; + u8* substr = memmem(start = haystack->origin, haystack->len, + needle->origin, needle->len); + if(!substr) return false; + debug_assert(substr > start); + *pos = (usize) (substr - start); + return true; +} + +int cmp_find_many(usize nhaystacks; const map_t *pIN needle, usize sizes[pOUT nhaystacks], usize nhaystacks, ...) +{ + va_list v_haystacks; + va_start(v_haystacks, nhaystacks); + + const map_t* pINOUT haystack; + for(usize i=0;iorigin) FATAL("haystack %lu was null or its origin was null", i); + if(!cmp_find(needle, haystack, size)) return (int)i; + } + + va_end(v_haystacks); + return -1; +} diff --git a/src/main.c b/src/main.c index dc36a00..6025dc7 100644 --- a/src/main.c +++ b/src/main.c @@ -13,6 +13,7 @@ #include #include +#include void prog_info(FILE* out) { @@ -60,8 +61,16 @@ inv_args: map_t needle; INFO("Mapping needle file `%s'", argv[1]); if(!map_handle_err(map_file(argv[1], false, 0, 0, &needle))) return PROG_RET_MAP_NEEDLE_FAILED; + if(!map_handle_err(map_preload(&needle, false))) WARN("Failed to advise kernel about memory access: needle"); - + //TODO: Map argv[2...] + //TODO: Setup thread-pool. + //TODO: Dispatch haystack maps to threadpool. + //TODO: Use either `cmp_find()` or `cmp_find_many()` to find the `needle` (mapped above) within those haystacks. + //TODO: Within the threadpool: output information regarding each match/nonmatch. + //TODO: Join the threadpool and consolidate results. + + //TODO: Should we return an error (`PROG_RET_MAP_HAYSTACK_N_FAILED(n)`) if one (or more) of the haystacks fail? Or consolidate multiple failures into `PROG_RET_MAP_HAYSTACK_FAILED | (failures)`? The latter would be the most complete. if(!map_handle_err(map_free(needle))) return PROG_RET_UNMAP_NEEDLE_FAILED; TRACE("main end"); diff --git a/src/map.c b/src/map.c index 74c9122..79d0853 100644 --- a/src/map.c +++ b/src/map.c @@ -84,6 +84,15 @@ map_result_t map_free(map_t map) return MAP_SUCCESS; } +map_result_t map_preload(map_t *pINOUT map, bool random) +{ + TRACE("preloading map %p (%p->%lu) for immediate %s access", map, map->origin, map->len, random ? "random" : "sequential"); + return madvise(map->origin, map->len, MADV_WILLNEED | (random ? MADV_RANDOM : MADV_SEQUENTIAL)) != 0 + ? MAP_ERR_ADVISE + : MAP_SUCCESS; + +} + const char* map_error_str(map_result_t res) { switch(res) @@ -94,6 +103,7 @@ const char* map_error_str(map_result_t res) CASE(MAP_ERR_OPEN, "open() failed (mapping a filepath)"); CASE(MAP_ERR_CLOSE, "close() failed (freeing a file-map)"); CASE(MAP_ERR_STAT, "fstat() failed: (mapping a full filepath)"); + CASE(MAP_ERR_ADVISE, "madvise() failed: (preloading mapped file)"); CASE(MAP_SUCCESS, "success"); #undef CASE