part: Reworked capacity calculation (static & dynamic,) growth, and bounding to be globally compile-time configurable.

TODO: Make that calculation configuration compile-time env-var configurable in debug & test builds (for benchmarking.)

Fortune for reverse's current commit: Future small blessing − 末小吉
refactor-thread-pool-no-channel-cmpext
Avril 9 months ago
parent 49e0dd6073
commit 03a3a1bfd0
Signed by: flanchan
GPG Key ID: 284488987C31F630

1
Cargo.lock generated

@ -579,6 +579,7 @@ dependencies = [
"clap",
"color-eyre",
"crossbeam",
"crossbeam-utils",
"futures",
"lazy_static",
"mapped-file",

@ -9,7 +9,8 @@ edition = "2018"
[features]
default = ["mapped-file", "threads"]
unstable = ["parking_lot?/nightly", "memchr/compiler_builtins", "crossbeam?/nightly", "futures?/unstable", "color-eyre/track-caller"]
# Enable nightly features.
unstable = ["parking_lot?/nightly", "memchr/compiler_builtins", "crossbeam?/nightly", "futures?/unstable", "color-eyre/track-caller", "crossbeam-utils/nightly"]
# Enables parallelising the operation where possible.
#
@ -73,6 +74,7 @@ bytes = "1.6.0"
clap = { version = "4.5.4", features = ["derive", "string", "unicode", "wrap_help"] }
color-eyre = { version = "0.6.3", default-features = false }
crossbeam = { version = "0.8.4", optional = true, features = ["crossbeam-queue", "crossbeam-channel", "crossbeam-epoch"] }
crossbeam-utils = "0.8.19"
#crossbeam-queue = { version = "0.3.11", optional = true }
futures = { version = "0.3.30", optional = true, default-features = false, features = ["alloc", "std", "async-await"] }
lazy_static = { version = "1.4.0", features = ["spin"] }

@ -3,6 +3,12 @@
use super::*;
use std::convert::Infallible;
pub use std::{
convert::{
TryFrom, TryInto,
},
};
/// The default bottom type.
///
/// To use the `unwrap_infallible()`-like interface, functions that return `-> !` should be changed to `-> Never`.

@ -1,6 +1,5 @@
#![cfg_attr(feature="unstable", feature(never_type))] // See `Never`.
#[macro_use] mod ext; use ext::*;
mod part;

@ -4,8 +4,35 @@ use std::{
num::NonZeroUsize,
};
/// Size of one cache-line.
///
/// NOTE: alignment padded for `u8`.
///
/// TODO: Make this comptime env-var configurable (`option_env!()`) on debug builds. (See `SEARCH_CAP_GROW`.)
const CACHELINE_SIZE: usize = std::mem::size_of::<crossbeam_utils::CachePadded<u8>>();
/// Grow capacity exponentially when search fails.
///
/// TODO: Make this comptime env-var settable (`option_env!()`) on debug builds.
const SEARCH_CAP_GROW: bool = true;
/// Settings for a searcher (memory search method configuration.)
///
/// The default values provided to implementors are globally controlled and (debug-build only) env-var configurable (for benchmarking purposes.)
trait SynchonousSearcher {
/// Initial size of capacity
const CAP_SIZE: usize = CACHELINE_SIZE;
/// Should the capacity be grown on failed search?
const CAP_GROW: bool = SEARCH_CAP_GROW;
}
// Default impl global compiled capacity settings for each.
impl SynchonousSearcher for SearchPar {}
impl SynchonousSearcher for SearchSeq {}
/// Midpoint searcher (forwards & backwards)
trait MidpointFBSearcher<T=u8>
trait MidpointFBSearcher<T=u8>: SynchonousSearcher
{
fn search_forward<'a>(&self, haystack: &'a [T], needle: T) -> Option<&'a T>;
fn search_backward<'a>(&self, haystack: &'a [T], needle: T) -> Option<&'a T>;
@ -16,9 +43,27 @@ trait MidpointFBSearcher<T=u8>
/// Search the pivot for the needle sequentially.
///
/// The order of operations will be: `search_forward()?, search_backward()`.
#[derive(Debug)]
#[derive(Debug, Clone, Default)]
struct SearchSeq;
#[inline]
fn get_max_cap_for_search_area(size: usize) -> Option<NonZeroUsize>
{
SYS_PAGE_SIZE.and_then(move |page| if size == 0 {
// Size is unknown, bound by page.
Some(page)
} else if size >= (page.get() << 2) {
// Size is huge, bound by page ^2
NonZeroUsize::new(page.get() << 1)
} else if size >= page.get() {
// Size is larger than page, bound by page.
Some(page)
} else {
// If the area size is lower than one page, do not bound the capacity growth.
None
})
}
impl MidpointFBSearcher<u8> for SearchSeq
{
#[inline(always)]
@ -32,16 +77,7 @@ impl MidpointFBSearcher<u8> for SearchSeq
#[inline]
fn search_combined<'a>(&self, haystack: &'a [u8], begin: usize, needle: u8) -> Option<&'a u8> {
let max_cap = match get_max_pivot_search_area(haystack.len() > (DEFAULT_PIVOT_MAX_SEARCH_AREA * DEFAULT_MEM_DETECT_HUGE_SIZE_PAGES)){ // Assume huge-page memory if len is larger than 4 pages.
// On debug builds, cap search area to one system page only.
_ignore if cfg!(debug_assertions) && (*REAL_PAGE_SIZE) > 0 =>
// SAFETY: We have checked if `*REAL_PAGE_SIZE` is non-zero above.
Some(unsafe { NonZeroUsize::new_unchecked(*REAL_PAGE_SIZE as usize) }),
// Otherwise, use the detected value.
cap => cap,
};
let max_cap = get_max_cap_for_search_area(haystack.len());
match haystack.split_at(begin) {
([], []) => None,
@ -56,7 +92,12 @@ impl MidpointFBSearcher<u8> for SearchSeq
(mut x, mut y) => {
let len = std::cmp::min(x.len(), y.len());
let mut cap = std::cmp::min(len, DEFAULT_PIVOT_MAX_SEARCH_AREA);
let mut cap = std::cmp::min(len, Self::CAP_SIZE);
if let Some(&max) = max_cap.as_ref() {
// Bound `cap` to `max_cap` if it is set.
cap = std::cmp::min(cap, max.get());
}
while cap <= len {
// If cap is too large for one (or more) of the buffers, truncate it.
@ -77,8 +118,11 @@ impl MidpointFBSearcher<u8> for SearchSeq
y = &y[cap..];
// Cut out `cap` bytes from the end of backwards.
x = &x[..cap];
// Grow `cap` by 1 ^2 (not passing `max_cap` if there is one set.)
cap = max_cap.map(|max| std::cmp::min(max.get(), cap << 1)).unwrap_or_else(|| cap << 1);
if Self::CAP_GROW {
// Grow `cap` by 1 ^2 (not passing `max_cap` if there is one set.)
cap = max_cap.map(|max| std::cmp::min(max.get(), cap << 1)).unwrap_or_else(|| cap << 1);
}
}
None
}
@ -109,23 +153,13 @@ const _TODO_FUTURES_JOIN2_ASYNC_SEARCH: () = {
/// Search in parallel.
///
/// # Warning
/// This search operation is heavy. It **always** spawns its own (up to) two threads when `search_combined()` is invoked. This may not be ideal...
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
struct SearchPar
{
cap_start: usize,
}
/// For f/b pivot-searching, the max area for each operation to attempt.
const DEFAULT_PIVOT_MAX_SEARCH_AREA: usize = 1024;
/// For f/b pivot-searching, the max *possible* area for each operation to attempt when it grows in capacity
const DEFAULT_PIVOT_MAX_POSSIBLE_SEARCH_AREA: usize = (1024 * 1024 * 1024) * 2; // 2GB
/// The number of pages of memory loaded where non-page-bound operations assume they're using HP-mapped data.
const DEFAULT_MEM_DETECT_HUGE_SIZE_PAGES: usize = 4;
/// This search operation is heavy. It **always** spawns its own 2nd thread when `search_combined()` is invoked.
/// This may not be ideal... A lighter, thread-pool (async) or thread-reusing (sync) API would be better. (See below.)
#[derive(Debug, Clone, Default)]
struct SearchPar;
lazy_static::lazy_static! {
/// Real system page size.
/// Real system page size (raw.)
static ref REAL_PAGE_SIZE: std::ffi::c_int = {
use std::ffi::c_int;
extern "C" {
@ -135,72 +169,19 @@ lazy_static::lazy_static! {
getpagesize()
}
};
}
/// Get a recommended bound for the pivot search area (if there is one.)
///
/// # Returns
/// The recommended max bound for a pivot search area, or `None` for unbounded search.
///
/// # Page kinds
/// If the operation is using huge-page mapped memory, set `use_hp` to true.
#[inline]
fn get_max_pivot_search_area(_use_hp: bool) -> Option<NonZeroUsize>
{
use std::ffi::c_int;
lazy_static::lazy_static! {
static ref PAGE_SIZE: usize = {
match *REAL_PAGE_SIZE {
c_int::MIN..=0 => DEFAULT_PIVOT_MAX_SEARCH_AREA,
// Very large (hp)
very_large if very_large as usize > DEFAULT_PIVOT_MAX_POSSIBLE_SEARCH_AREA => 0,
// Large (limit to upper bound of non-hp)
large if large as usize >= DEFAULT_PIVOT_MAX_SEARCH_AREA => std::cmp::min(large as usize, DEFAULT_PIVOT_MAX_POSSIBLE_SEARCH_AREA),
// Smaller than default bound
small => small as usize
/// System page size.
///
/// If the page size returned from `getpagesize()` (`REAL_PAGE_SIZE`) was invalid (below-or-equal to 0,) `None` will be returned.
static ref SYS_PAGE_SIZE: Option<NonZeroUsize> = {
match *REAL_PAGE_SIZE {
std::ffi::c_int::MIN..=0 => None,
// SAFETY: We have masked out `0` in the above branch.
rest => unsafe {
debug_assert!(usize::try_from(rest).is_ok(), "Page size `c_int` out of range of system `usize`??? (Got {})", rest);
Some(NonZeroUsize::new_unchecked(rest as usize))
}
};
}
//XXX: Should we return a different value if `use_hp` is enabled? ("using hugepage")
NonZeroUsize::new(*PAGE_SIZE)
}
impl SearchPar {
#[inline(always)]
pub const fn new() -> Self
{
Self::with_capacity(DEFAULT_PIVOT_MAX_SEARCH_AREA)
}
#[inline]
pub const fn with_capacity(cap_start: usize) -> Self
{
Self { cap_start }
}
#[inline]
pub const fn cap(&self) -> usize
{
self.cap_start
}
#[inline(always)]
pub unsafe fn cap_mut(&mut self) -> &mut usize
{
&mut self.cap_start
}
}
impl Default for SearchPar
{
#[inline]
fn default() -> Self
{
Self {
cap_start: match *REAL_PAGE_SIZE {
std::ffi::c_int::MIN..=0 => DEFAULT_PIVOT_MAX_SEARCH_AREA,
above_zero => above_zero as usize,
},
}
}
};
}
#[cfg(feature="threads")]
@ -225,20 +206,13 @@ impl MidpointFBSearcher<u8> for SearchPar
let (mut hb, mut hf) = haystack.split_at(begin);
let max_cap = match get_max_pivot_search_area(hf.len() > (DEFAULT_PIVOT_MAX_SEARCH_AREA * DEFAULT_MEM_DETECT_HUGE_SIZE_PAGES)){ // Assume huge-page memory if len is larger than 4 pages.
// On debug builds, cap search area to one system page only.
_ignore if cfg!(debug_assertions) && (*REAL_PAGE_SIZE) > 0 =>
// SAFETY: We have checked if `*REAL_PAGE_SIZE` is non-zero above.
Some(unsafe { NonZeroUsize::new_unchecked(*REAL_PAGE_SIZE as usize) }),
// Otherwise, use the detected value.
cap => cap,
};
let max_cap = get_max_cap_for_search_area(haystack.len());
// Cap the cap to `max_cap` if there is a max cap.
let cap = if let Some(max) = max_cap.as_ref() {
std::cmp::min(max.get(), self.cap_start)
std::cmp::min(max.get(), Self::CAP_SIZE)
} else {
self.cap_start
Self::CAP_SIZE
};
let forward = if hf.len() > 0 {
@ -267,8 +241,10 @@ impl MidpointFBSearcher<u8> for SearchPar
}
// Cut out `cap` bytes from the start.
hf = &hf[cap..];
// Grow `cap` by 1 ^2 (not passing `max_cap` if there is one set.)
cap = max_cap.map(|max| std::cmp::min(max.get(), cap << 1)).unwrap_or_else(|| cap << 1);
if Self::CAP_GROW {
// Grow `cap` by 1 ^2 (not passing `max_cap` if there is one set.)
cap = max_cap.map(|max| std::cmp::min(max.get(), cap << 1)).unwrap_or_else(|| cap << 1);
}
}
None::<&'a u8>
}).expect("Failed to spawn forward-searcher thread"))
@ -355,6 +331,8 @@ where S: MidpointFBSearcher<u8>
#[cfg(test)]
mod test
{
//TODO: Add a generic randomised lorem-ipsum-like text data generator & a generic assertion tester that can take a unique `MidpointFBSearcher`.
#[test]
fn partition_seq()
{

Loading…
Cancel
Save