diff --git a/Cargo.lock b/Cargo.lock index 3f99d76..e872d27 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -579,6 +579,7 @@ dependencies = [ "clap", "color-eyre", "crossbeam", + "crossbeam-utils", "futures", "lazy_static", "mapped-file", diff --git a/Cargo.toml b/Cargo.toml index 212e9e3..23b9c4a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,7 +9,8 @@ edition = "2018" [features] default = ["mapped-file", "threads"] -unstable = ["parking_lot?/nightly", "memchr/compiler_builtins", "crossbeam?/nightly", "futures?/unstable", "color-eyre/track-caller"] +# Enable nightly features. +unstable = ["parking_lot?/nightly", "memchr/compiler_builtins", "crossbeam?/nightly", "futures?/unstable", "color-eyre/track-caller", "crossbeam-utils/nightly"] # Enables parallelising the operation where possible. # @@ -73,6 +74,7 @@ bytes = "1.6.0" clap = { version = "4.5.4", features = ["derive", "string", "unicode", "wrap_help"] } color-eyre = { version = "0.6.3", default-features = false } crossbeam = { version = "0.8.4", optional = true, features = ["crossbeam-queue", "crossbeam-channel", "crossbeam-epoch"] } +crossbeam-utils = "0.8.19" #crossbeam-queue = { version = "0.3.11", optional = true } futures = { version = "0.3.30", optional = true, default-features = false, features = ["alloc", "std", "async-await"] } lazy_static = { version = "1.4.0", features = ["spin"] } diff --git a/src/ext.rs b/src/ext.rs index 4f4cfe2..14ac938 100644 --- a/src/ext.rs +++ b/src/ext.rs @@ -3,6 +3,12 @@ use super::*; use std::convert::Infallible; +pub use std::{ + convert::{ + TryFrom, TryInto, + }, +}; + /// The default bottom type. /// /// To use the `unwrap_infallible()`-like interface, functions that return `-> !` should be changed to `-> Never`. diff --git a/src/main.rs b/src/main.rs index 914b3e4..62b54d8 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,5 @@ #![cfg_attr(feature="unstable", feature(never_type))] // See `Never`. - #[macro_use] mod ext; use ext::*; mod part; diff --git a/src/part.rs b/src/part.rs index b4c4cc3..826898f 100644 --- a/src/part.rs +++ b/src/part.rs @@ -4,8 +4,35 @@ use std::{ num::NonZeroUsize, }; +/// Size of one cache-line. +/// +/// NOTE: alignment padded for `u8`. +/// +/// TODO: Make this comptime env-var configurable (`option_env!()`) on debug builds. (See `SEARCH_CAP_GROW`.) +const CACHELINE_SIZE: usize = std::mem::size_of::>(); + +/// Grow capacity exponentially when search fails. +/// +/// TODO: Make this comptime env-var settable (`option_env!()`) on debug builds. +const SEARCH_CAP_GROW: bool = true; + +/// Settings for a searcher (memory search method configuration.) +/// +/// The default values provided to implementors are globally controlled and (debug-build only) env-var configurable (for benchmarking purposes.) +trait SynchonousSearcher { + /// Initial size of capacity + const CAP_SIZE: usize = CACHELINE_SIZE; + + /// Should the capacity be grown on failed search? + const CAP_GROW: bool = SEARCH_CAP_GROW; +} + +// Default impl global compiled capacity settings for each. +impl SynchonousSearcher for SearchPar {} +impl SynchonousSearcher for SearchSeq {} + /// Midpoint searcher (forwards & backwards) -trait MidpointFBSearcher +trait MidpointFBSearcher: SynchonousSearcher { fn search_forward<'a>(&self, haystack: &'a [T], needle: T) -> Option<&'a T>; fn search_backward<'a>(&self, haystack: &'a [T], needle: T) -> Option<&'a T>; @@ -16,9 +43,27 @@ trait MidpointFBSearcher /// Search the pivot for the needle sequentially. /// /// The order of operations will be: `search_forward()?, search_backward()`. -#[derive(Debug)] +#[derive(Debug, Clone, Default)] struct SearchSeq; +#[inline] +fn get_max_cap_for_search_area(size: usize) -> Option +{ + SYS_PAGE_SIZE.and_then(move |page| if size == 0 { + // Size is unknown, bound by page. + Some(page) + } else if size >= (page.get() << 2) { + // Size is huge, bound by page ^2 + NonZeroUsize::new(page.get() << 1) + } else if size >= page.get() { + // Size is larger than page, bound by page. + Some(page) + } else { + // If the area size is lower than one page, do not bound the capacity growth. + None + }) +} + impl MidpointFBSearcher for SearchSeq { #[inline(always)] @@ -32,16 +77,7 @@ impl MidpointFBSearcher for SearchSeq #[inline] fn search_combined<'a>(&self, haystack: &'a [u8], begin: usize, needle: u8) -> Option<&'a u8> { - let max_cap = match get_max_pivot_search_area(haystack.len() > (DEFAULT_PIVOT_MAX_SEARCH_AREA * DEFAULT_MEM_DETECT_HUGE_SIZE_PAGES)){ // Assume huge-page memory if len is larger than 4 pages. - - // On debug builds, cap search area to one system page only. - _ignore if cfg!(debug_assertions) && (*REAL_PAGE_SIZE) > 0 => - // SAFETY: We have checked if `*REAL_PAGE_SIZE` is non-zero above. - Some(unsafe { NonZeroUsize::new_unchecked(*REAL_PAGE_SIZE as usize) }), - // Otherwise, use the detected value. - cap => cap, - }; - + let max_cap = get_max_cap_for_search_area(haystack.len()); match haystack.split_at(begin) { ([], []) => None, @@ -56,7 +92,12 @@ impl MidpointFBSearcher for SearchSeq (mut x, mut y) => { let len = std::cmp::min(x.len(), y.len()); - let mut cap = std::cmp::min(len, DEFAULT_PIVOT_MAX_SEARCH_AREA); + let mut cap = std::cmp::min(len, Self::CAP_SIZE); + + if let Some(&max) = max_cap.as_ref() { + // Bound `cap` to `max_cap` if it is set. + cap = std::cmp::min(cap, max.get()); + } while cap <= len { // If cap is too large for one (or more) of the buffers, truncate it. @@ -77,8 +118,11 @@ impl MidpointFBSearcher for SearchSeq y = &y[cap..]; // Cut out `cap` bytes from the end of backwards. x = &x[..cap]; - // Grow `cap` by 1 ^2 (not passing `max_cap` if there is one set.) - cap = max_cap.map(|max| std::cmp::min(max.get(), cap << 1)).unwrap_or_else(|| cap << 1); + + if Self::CAP_GROW { + // Grow `cap` by 1 ^2 (not passing `max_cap` if there is one set.) + cap = max_cap.map(|max| std::cmp::min(max.get(), cap << 1)).unwrap_or_else(|| cap << 1); + } } None } @@ -109,23 +153,13 @@ const _TODO_FUTURES_JOIN2_ASYNC_SEARCH: () = { /// Search in parallel. /// /// # Warning -/// This search operation is heavy. It **always** spawns its own (up to) two threads when `search_combined()` is invoked. This may not be ideal... -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -struct SearchPar -{ - cap_start: usize, -} - -/// For f/b pivot-searching, the max area for each operation to attempt. -const DEFAULT_PIVOT_MAX_SEARCH_AREA: usize = 1024; -/// For f/b pivot-searching, the max *possible* area for each operation to attempt when it grows in capacity -const DEFAULT_PIVOT_MAX_POSSIBLE_SEARCH_AREA: usize = (1024 * 1024 * 1024) * 2; // 2GB - -/// The number of pages of memory loaded where non-page-bound operations assume they're using HP-mapped data. -const DEFAULT_MEM_DETECT_HUGE_SIZE_PAGES: usize = 4; +/// This search operation is heavy. It **always** spawns its own 2nd thread when `search_combined()` is invoked. +/// This may not be ideal... A lighter, thread-pool (async) or thread-reusing (sync) API would be better. (See below.) +#[derive(Debug, Clone, Default)] +struct SearchPar; lazy_static::lazy_static! { - /// Real system page size. + /// Real system page size (raw.) static ref REAL_PAGE_SIZE: std::ffi::c_int = { use std::ffi::c_int; extern "C" { @@ -135,72 +169,19 @@ lazy_static::lazy_static! { getpagesize() } }; -} -/// Get a recommended bound for the pivot search area (if there is one.) -/// -/// # Returns -/// The recommended max bound for a pivot search area, or `None` for unbounded search. -/// -/// # Page kinds -/// If the operation is using huge-page mapped memory, set `use_hp` to true. -#[inline] -fn get_max_pivot_search_area(_use_hp: bool) -> Option -{ - use std::ffi::c_int; - lazy_static::lazy_static! { - static ref PAGE_SIZE: usize = { - match *REAL_PAGE_SIZE { - c_int::MIN..=0 => DEFAULT_PIVOT_MAX_SEARCH_AREA, - // Very large (hp) - very_large if very_large as usize > DEFAULT_PIVOT_MAX_POSSIBLE_SEARCH_AREA => 0, - // Large (limit to upper bound of non-hp) - large if large as usize >= DEFAULT_PIVOT_MAX_SEARCH_AREA => std::cmp::min(large as usize, DEFAULT_PIVOT_MAX_POSSIBLE_SEARCH_AREA), - // Smaller than default bound - small => small as usize + /// System page size. + /// + /// If the page size returned from `getpagesize()` (`REAL_PAGE_SIZE`) was invalid (below-or-equal to 0,) `None` will be returned. + static ref SYS_PAGE_SIZE: Option = { + match *REAL_PAGE_SIZE { + std::ffi::c_int::MIN..=0 => None, + // SAFETY: We have masked out `0` in the above branch. + rest => unsafe { + debug_assert!(usize::try_from(rest).is_ok(), "Page size `c_int` out of range of system `usize`??? (Got {})", rest); + Some(NonZeroUsize::new_unchecked(rest as usize)) } - - }; - } - //XXX: Should we return a different value if `use_hp` is enabled? ("using hugepage") - NonZeroUsize::new(*PAGE_SIZE) -} - -impl SearchPar { - #[inline(always)] - pub const fn new() -> Self - { - Self::with_capacity(DEFAULT_PIVOT_MAX_SEARCH_AREA) - } - #[inline] - pub const fn with_capacity(cap_start: usize) -> Self - { - Self { cap_start } - } - #[inline] - pub const fn cap(&self) -> usize - { - self.cap_start - } - - #[inline(always)] - pub unsafe fn cap_mut(&mut self) -> &mut usize - { - &mut self.cap_start - } -} - -impl Default for SearchPar -{ - #[inline] - fn default() -> Self - { - Self { - cap_start: match *REAL_PAGE_SIZE { - std::ffi::c_int::MIN..=0 => DEFAULT_PIVOT_MAX_SEARCH_AREA, - above_zero => above_zero as usize, - }, } - } + }; } #[cfg(feature="threads")] @@ -225,20 +206,13 @@ impl MidpointFBSearcher for SearchPar let (mut hb, mut hf) = haystack.split_at(begin); - let max_cap = match get_max_pivot_search_area(hf.len() > (DEFAULT_PIVOT_MAX_SEARCH_AREA * DEFAULT_MEM_DETECT_HUGE_SIZE_PAGES)){ // Assume huge-page memory if len is larger than 4 pages. - - // On debug builds, cap search area to one system page only. - _ignore if cfg!(debug_assertions) && (*REAL_PAGE_SIZE) > 0 => - // SAFETY: We have checked if `*REAL_PAGE_SIZE` is non-zero above. - Some(unsafe { NonZeroUsize::new_unchecked(*REAL_PAGE_SIZE as usize) }), - // Otherwise, use the detected value. - cap => cap, - }; + let max_cap = get_max_cap_for_search_area(haystack.len()); + // Cap the cap to `max_cap` if there is a max cap. let cap = if let Some(max) = max_cap.as_ref() { - std::cmp::min(max.get(), self.cap_start) + std::cmp::min(max.get(), Self::CAP_SIZE) } else { - self.cap_start + Self::CAP_SIZE }; let forward = if hf.len() > 0 { @@ -267,8 +241,10 @@ impl MidpointFBSearcher for SearchPar } // Cut out `cap` bytes from the start. hf = &hf[cap..]; - // Grow `cap` by 1 ^2 (not passing `max_cap` if there is one set.) - cap = max_cap.map(|max| std::cmp::min(max.get(), cap << 1)).unwrap_or_else(|| cap << 1); + if Self::CAP_GROW { + // Grow `cap` by 1 ^2 (not passing `max_cap` if there is one set.) + cap = max_cap.map(|max| std::cmp::min(max.get(), cap << 1)).unwrap_or_else(|| cap << 1); + } } None::<&'a u8> }).expect("Failed to spawn forward-searcher thread")) @@ -355,6 +331,8 @@ where S: MidpointFBSearcher #[cfg(test)] mod test { + //TODO: Add a generic randomised lorem-ipsum-like text data generator & a generic assertion tester that can take a unique `MidpointFBSearcher`. + #[test] fn partition_seq() {