Compare commits

...

24 Commits

6
.gitignore vendored

@ -1,3 +1,9 @@
obj/
prof/
fcmp-*
test/
perf.*
flamegraph.svg
Cargo.lock
target

@ -3,18 +3,34 @@ INCLUDE=include
PROJECT=fcmp
OPT_FLAGS+= -fgraphite
OPT_FLAGS_RUST?= -C target-cpu=native
OPT_FLAGS?= -march=native -fgraphite -fopenmp -floop-parallelize-all -ftree-parallelize-loops=4 \
-floop-interchange -ftree-loop-distribution -floop-strip-mine -floop-block
RELEASE_CFLAGS?= -O3 -march=native -flto $(OPT_FLAGS)
FEAT_RUST?= threads
FEAT_CFLAGS?= -D_RUN_THREADED=0
FEAT_LDFLAGS?= -lpthread
RELEASE_CFLAGS?= -O3 -flto $(OPT_FLAGS)
RELEASE_LDFLAGS?= -O3 -flto
DEBUG_CFLAGS?= -DDEBUG -O0 -g
DEBUG_LDFLAGS?= -O0
CFLAGS+= -Wall -pedantic --std=gnu11 $(addprefix -I,$(INCLUDE))
LDFLAGS+=
CFLAGS+= $(FEAT_CFLAGS) -Wall -pedantic --std=gnu11 $(addprefix -I,$(INCLUDE))
LDFLAGS+= $(FEAT_LDFLAGS)
# PGO specific vars
PROF_ITERATIONS=50
PROF_LARGE_BOUND= $$(( 1024 * 1024 * 10 ))
PROF_SMALL_BOUND= $$(( 1024 * 10 ))
PROF_LOCATION?=/tmp/fcmp-pgo
PROF_FLAGS = -fprofile-generate
OBJ = $(addprefix obj/,$(SRC:.c=.o))
PGO_OBJ = $(addprefix prof/,$(SRC:.c=.o))
.PHONY: release
release: | dirs $(PROJECT)-release
@ -22,12 +38,21 @@ release: | dirs $(PROJECT)-release
.PHONY: debug
debug: | dirs $(PROJECT)-debug
.PHONY: pgo
pgo: | dirs $(PROJECT)-pgo
.PHONY: rs
rs: | $(PROJECT)-rs
dirs:
@mkdir -p obj/src
@mkdir -p {obj,prof}/src
obj/%.o: %.c
$(CC) -c $< $(CFLAGS) -o $@ $(LDFLAGS)
prof/%.o: %.c
$(CC) -c $< $(CFLAGS) $(PROF_FLAGS) -o $@ $(LDFLAGS) $(PROF_FLAGS)
$(PROJECT)-release: CFLAGS := $(RELEASE_CFLAGS) $(CFLAGS)
$(PROJECT)-release: LDFLAGS := $(RELEASE_LDFLAGS) $(LDFLAGS)
$(PROJECT)-release: $(OBJ)
@ -39,6 +64,61 @@ $(PROJECT)-debug: LDFLAGS := $(DEBUG_LDFLAGS) $(LDFLAGS)
$(PROJECT)-debug: $(OBJ)
$(CC) $^ $(CFLAGS) -o $@ $(LDFLAGS)
$(PROJECT)-rs:
cd fcmprs && OPT_FLAGS="$(OPT_FLAGS_RUST)" CARGO_FEATURES="$(FEAT_RUST)" $(MAKE)
cp -f ./fcmprs/target/release/fcmprs $@
pgo-generate: CFLAGS := $(RELEASE_CFLAGS) $(CFLAGS)
pgo-generate: LDFLAGS := $(RELEASE_LDFLAGS) $(LDFLAGS)
pgo-generate: $(PGO_OBJ)
$(CC) $^ $(CFLAGS) $(PROF_FLAGS) -o $@ $(LDFLAGS) $(PROF_FLAGS)
pgo-reset:
find ./prof -name \*.gcda -exec rm {} +
pgo-profile: | pgo-generate pgo-reset
#./profile/gen $(PROF_LARGE_BOUND) "$(PROF_LOCATION)/large"
#./profile/gen $(PROF_SMALL_BOUND) "$(PROF_LOCATION)/small"
for i in {1..$(PROF_ITERATIONS)}; do \
rm -rf $(PROF_LOCATION); \
mkdir -p $(PROF_LOCATION)/{large,small}; \
printf "Iteration $$i of $(PROF_ITERATIONS)\r"; \
./profile/gen $(PROF_LARGE_BOUND) "$(PROF_LOCATION)/large" >> /dev/null; \
./profile/gen $(PROF_SMALL_BOUND) "$(PROF_LOCATION)/small" >> /dev/null; \
./pgo-generate $(PROF_LOCATION)/large/matching/* & > $(PROF_LOCATION)/stdout; \
./pgo-generate $(PROF_LOCATION)/large/unmatching/* & > $(PROF_LOCATION)/stdout; \
./pgo-generate $(PROF_LOCATION)/small/matching/* & > $(PROF_LOCATION)/stdout; \
./pgo-generate $(PROF_LOCATION)/small/unmatching/* & > $(PROF_LOCATION)/stdout; \
\
./pgo-generate $(PROF_LOCATION)/small/matching/{1,2} & > $(PROF_LOCATION)/stdout; \
./pgo-generate $(PROF_LOCATION)/large/matching/{1,2} & > $(PROF_LOCATION)/stdout; \
./pgo-generate $(PROF_LOCATION)/small/unmatching/{1,2} & > $(PROF_LOCATION)/stdout; \
./pgo-generate $(PROF_LOCATION)/large/unmatching/{1,2} & > $(PROF_LOCATION)/stdout; \
wait; \
rm -rf $(PROF_LOCATION)/{large,small}; \
done
@echo ""
rm -rf $(PROF_LOCATION)
rm pgo-generate
pgo-use: CFLAGS := $(RELEASE_CFLAGS) $(CFLAGS)
pgo-use: LDFLAGS := $(RELEASE_LDFLAGS) $(LDFLAGS)
pgo-use: PROF_FLAGS = -fprofile-use -fprofile-correction
pgo-use: $(PGO_OBJ)
$(CC) $^ $(CFLAGS) $(PROF_FLAGS) -o $@ $(LDFLAGS) $(PROF_FLAGS)
$(PROJECT)-pgo: CFLAGS := $(RELEASE_CFLAGS) $(CFLAGS)
$(PROJECT)-pgo: LDFLAGS := $(RELEASE_LDFLAGS) $(LDFLAGS)
$(PROJECT)-pgo: pgo-profile
find ./prof -name \*.o -exec rm {} +
$(MAKE) pgo-use
mv pgo-use $@
strip $@
clean:
rm -rf obj
rm -f $(PROJECT)-{release,debug}
cd fcmprs && make clean
rm -rf {obj,prof}
rm -f $(PROJECT)-{release,debug,pgo,rs}

@ -38,13 +38,24 @@ Build with default optimisations using `make release`, it will output a stripped
### Notes
* The Makefile uses variables `RELEASE_CFLAGS` and `RELEASE_LDFLAGS` to apply optimisations (and `DEBUG_CFLAGS` + `DEBUG_LDFLAGS` for extra compiler flags with the debug target). If needed you can set these yourself to prevent the defaults.
* The default `RELEASE_CFLAGS` specify `-march=native` which may be undesireable for you. Set the variable or modify the Makefile if you need to remove this.
* The default `OPT_FLAGS` specify `-march=native` which may be undesireable for you. Set the variable or modify the Makefile if you need to remove this.
## PGO
Building with Profile Guided Optimisation is supported with the `pgo` Makefile target. It uses the same rules as the `release` target and outputs a binary to `fcmp-pgo`.
There may be small performance improvements from using this target instead of `release`, but the difference is mostly negligable.
## Debug target
Build with debugging information and no optimisations using `make debug`, it will output a binary at `fcmp-debug`.
## Note
Before switching between `release` and `debug` targets, make sure to run `make clean`.
## Notes
- Before switching between targets, make sure to run `make clean`.
- GCC + Graphite compiler specific optimisation flags are added by default with the `OPT_FLAGS` variable. Override this variable if using another compiler that doesn't support these optimisations.
### Multithreading
- By default, parallel processing is enabled when building through `libpthread`, to build a single-threaded version override the variables `FEAT_CFLAGS` and `FEAT_LDFLAGS` to empty.
- By default the program will decide at runtime whether or not to use parallelised processing. You can set `FEAT_CFLAGS="-D_RUN_THREADED=1"` to _force_ the use of a parallelised run every time in the binary, although this is not recommended.
- Performance gains from parallelised runs mostly appear with a large number of files being compared at once, as the task delegation overhead is surpassed.
# License
GPL'd with <3

@ -0,0 +1,25 @@
[package]
name = "fcmprs"
version = "0.1.0"
authors = ["Avril <flanchan@cumallover.me>"]
edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[features]
default = ["threads"]
threads = ["rayon"]
[profile.release]
opt-level = 3
lto = "fat"
codegen-units = 1
panic = "abort"
[dependencies]
cfg-if = "1.0.0"
memmap = "0.7.0"
once_cell = "1.5.2"
rayon = {version = "1.5.0", optional = true}
smallvec = "1.5.0"

@ -0,0 +1,76 @@
PROJECT = fcmprs
CARGO_FEATURES?= threads
OPT_FLAGS?= -C target-cpu=native
RUSTFLAGS?=
PROF_DIR=/tmp/fcmprs/prof
PROF_ITERATIONS?=100
PROF_LOCATION?=/tmp/fcmprs-profiters
PROF_LARGE_BOUND= $$(( 1024 * 1024 * 10 ))
PROF_SMALL_BOUND= $$(( 1024 * 10 ))
.PHONY: release
release: target/release/$(PROJECT)
.PHONY: debug
debug: target/debug/$(PROJECT)
.PHONY: pgo
pgo: target/release/$(PROJECT)-pgo
# Currently broken.
target/release/$(PROJECT): RUSTFLAGS+= $(OPT_FLAGS)
target/release/$(PROJECT):
RUSTFLAGS="$(RUSTFLAGS)" cargo build --release --no-default-features $(addprefix --features ,$(CARGO_FEATURES))
strip $@
target/debug/$(PROJECT):
RUSTFLAGS="$(RUSTFLAGS)" cargo build --no-default-features $(addprefix --features ,$(CARGO_FEATURES))
pgo-generate: RUSTFLAGS+= -Cprofile-generate=$(PROF_DIR)
pgo-generate: target/release/$(PROJECT)
pgo-profile: pgo-generate
for i in {1..$(PROF_ITERATIONS)}; do \
rm -rf $(PROF_LOCATION); \
mkdir -p $(PROF_LOCATION)/{large,small}; \
printf "Iteration $$i of $(PROF_ITERATIONS)\r"; \
../profile/gen $(PROF_LARGE_BOUND) "$(PROF_LOCATION)/large" >> /dev/null; \
../profile/gen $(PROF_SMALL_BOUND) "$(PROF_LOCATION)/small" >> /dev/null; \
./target/release/fcmprs $(PROF_LOCATION)/large/matching/* & > $(PROF_LOCATION)/stdout; \
./target/release/fcmprs $(PROF_LOCATION)/large/unmatching/* & > $(PROF_LOCATION)/stdout; \
./target/release/fcmprs $(PROF_LOCATION)/small/matching/* & > $(PROF_LOCATION)/stdout; \
./target/release/fcmprs $(PROF_LOCATION)/small/unmatching/* & > $(PROF_LOCATION)/stdout; \
\
./target/release/fcmprs $(PROF_LOCATION)/small/matching/{1,2} & > $(PROF_LOCATION)/stdout; \
./target/release/fcmprs $(PROF_LOCATION)/large/matching/{1,2} & > $(PROF_LOCATION)/stdout; \
./target/release/fcmprs $(PROF_LOCATION)/small/unmatching/{1,2} & > $(PROF_LOCATION)/stdout; \
./target/release/fcmprs $(PROF_LOCATION)/large/unmatching/{1,2} & > $(PROF_LOCATION)/stdout; \
wait; \
rm -rf $(PROF_LOCATION)/{large,small}; \
done
@echo ""
rm -rf $(PROF_LOCATION)
llvm-profdata merge -o $(PROF_DIR)/merged.profdata $(PROF_DIR)
pgo-use: RUSTFLAGS+= -Cprofile-use=$(PROF_DIR)/merged.profdata -Cllvm-args=-pgo-warn-missing-function
pgo-use: target/release/$(PROJECT)
pgo-reset:
rm -rf $(PROF_DIR)
mkdir -p $(PROF_DIR)
target/release/$(PROJECT)-pgo: | pgo-reset pgo-profile
#rm -rf target
CARGO_INCREMENTAL=0 $(MAKE) -B pgo-use
mv -f target/release/$(PROJECT) $@
clean:
rm -rf target
rm -rf $(PROF_LOCATION) $(PROF_DIR)

@ -0,0 +1,42 @@
use std::{fmt,error};
#[derive(Debug)]
/// There was a non-matching file
pub enum UnmatchError
{
Size,
Data,
Unknown,
}
impl error::Error for UnmatchError{}
impl fmt::Display for UnmatchError
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result
{
match self {
Self::Size => write!(f, "size differs"),
Self::Data => write!(f, "data differs"),
_ => write!(f, "unknown error"),
}
}
}
pub trait ResultPrintExt<T>
{
fn discard_msg(self, msg: impl AsRef<str>) -> Option<T>;
}
impl<T, E> ResultPrintExt<T> for Result<T,E>
where E: std::fmt::Display
{
fn discard_msg(self, msg: impl AsRef<str>) -> Option<T> {
match self {
Ok(v) => Some(v),
Err(e) => {
eprintln!("{}: {}", msg.as_ref(), e);
None
},
}
}
}

@ -0,0 +1,105 @@
#![allow(dead_code)]
#[cfg(feature="threads")] use rayon::prelude::*;
#[allow(unused_imports)]
use std::{
path::Path,
io, fs::{self, OpenOptions,},
convert::TryInto,
};
use smallvec::SmallVec;
use cfg_if::cfg_if;
fn usage() -> !
{
eprintln!("fcmprs: Compare files for identity");
eprintln!("Usage: {} <files...>", std::env::args().next().unwrap());
std::process::exit(-1)
}
mod error;
use error::ResultPrintExt as _;
mod map;
use map::MappedFile as _;
use error::UnmatchError;
fn main() {
let (map1, rest) = {
let mut args = std::env::args().skip(1);
if let Some(one) = args.next() {
(one, args)
} else {
usage();
}
};
std::process::exit({
if let Some(map1) = map::map(&map1).discard_msg(format!("Failed to map file {}", map1)) {
let slice = map1.as_slice();
#[cfg(feature="threads")] let map1_sz: u64 = slice.len().try_into().expect("File size could not fit into u64. This should never happen."); // For now, non-threaded mode doesn't use this.
let mut ok = true;
let chk: SmallVec<[_; 32]> = rest.filter_map(|filename| {
let path = Path::new(&filename);
if path.exists() && path.is_file() {
map::map(path).discard_msg(format!("Failed to map file {}", filename))
} else {
eprintln!("File {} does not exist or is not a normal file", filename);
ok=false;
None
}
}).collect();
if !ok {
-1
} else {
cfg_if! {
if #[cfg(feature="threads")] {
match chk.into_par_iter()
.map(|map| {
if let Ok(stat) = map.as_file().metadata() {
if stat.len() != map1_sz {
return Err(UnmatchError::Size);
}
if !stat.is_file() {
return Err(UnmatchError::Unknown);
}
}
if slice == map.as_slice() {
Ok(())
} else {
Err(UnmatchError::Data)
}
})
.try_reduce_with(|_, _| Ok(()))
{
Some(Ok(_)) => 0,
Some(Err(UnmatchError::Data)) => 1,
Some(Err(UnmatchError::Size)) => 2,
None => usage(),
_ => -1,
}
} else {
match chk.into_iter()
.map(|map| {
slice == map.as_slice()
})
.try_fold((false, true), |(_, a), b| if a && b {Ok((true, true))} else {Err(UnmatchError::Data)})
{
Ok((true, _)) => 0,
Ok((false, _)) => usage(),
Err(_) => 1,
}
}
}
}
} else {
-1
}
})
}

@ -0,0 +1,135 @@
use super::*;
use once_cell::sync::OnceCell;
pub trait MappedFile
{
#[inline] fn as_slice(&self) -> &[u8]
{
&self.as_map()[..]
}
fn as_map(&self) -> &memmap::Mmap;
fn as_file(&self) -> &fs::File;
}
pub trait MappedFileNew: MappedFile + Sized
{
fn try_map(file: fs::File) -> io::Result<Self>;
#[inline] fn map(file: fs::File) -> Self
{
Self::try_map(file).unwrap()
}
}
/// Represents an open and memory mapped file
#[derive(Debug)]
pub struct MemMap
{
map: memmap::Mmap,
file: fs::File,
}
impl MappedFile for MemMap
{
/// Get the memory mapped portion as a slice
fn as_slice(&self) -> &[u8] {
&self.map[..]
}
fn as_map(&self) -> &memmap::Mmap {
&self.map
}
#[inline] fn as_file(&self) -> &fs::File {
&self.file
}
}
impl MappedFileNew for MemMap
{
#[inline] fn try_map(file: fs::File) -> io::Result<Self>
{
Ok(MemMap {
map: unsafe { memmap::Mmap::map(&file)? },
file,
})
}
}
/// Attempt to map this file
pub fn map_with<M: MappedFileNew>(file: &Path) -> io::Result<M>
{
let file = OpenOptions::new()
.read(true)
.open(file)?;
M::try_map(file)
}
/// Type container for memory map
pub type DefaultMapType = LazyMap;
/// Attempt to map this file to the `DefaultMapType`
pub fn map(file: impl AsRef<Path>) -> io::Result<DefaultMapType>
{
map_with(file.as_ref())
}
/// An open and maybe mapped file
#[derive(Debug)]
pub struct LazyMap
{
map: OnceCell<memmap::Mmap>,
file: fs::File,
}
impl LazyMap
{
#[inline(always)] fn get_map(&self) -> &memmap::Mmap
{
self.map.get_or_init(|| unsafe {memmap::Mmap::map(&self.file).expect("Lazy map failed")})
}
#[inline(always)] fn try_get_map(&self) -> io::Result<&memmap::Mmap>
{
self.map.get_or_try_init(|| unsafe {memmap::Mmap::map(&self.file)})
}
/// Is the memory mapped already?
#[inline] pub fn is_mapped(&self) -> bool
{
self.map.get().is_some()
}
/// Get the mapped portion if it is mapped, attempting a map if not
#[inline] pub fn try_as_slice(&self) -> io::Result<&[u8]>
{
Ok(&self.try_get_map()?[..])
}
}
impl MappedFile for LazyMap
{
/// Get the memory mapped portion as a slice
///
/// Returns blank slice if mapping fails
#[inline] fn as_slice(&self) -> &[u8]
{
self.try_get_map()
.map(|x| &x[..])
.unwrap_or(&[])
}
fn as_map(&self) -> &memmap::Mmap {
self.map.get().unwrap()
}
#[inline] fn as_file(&self) -> &fs::File {
&self.file
}
}
impl MappedFileNew for LazyMap
{
#[inline] fn try_map(file: fs::File) -> io::Result<Self>
{
Ok(LazyMap {
map: OnceCell::new(),
file,
})
}
}

@ -0,0 +1,27 @@
#ifndef _FCMP_H
#define _FCMP_H
#ifdef DEBUG
#define _FORCE_INLINE static inline __attribute__((gnu_inline))
#else
#define _FORCE_INLINE extern inline __attribute__((gnu_inline))
#endif
#define _ALIAS __attribute__((may_alias))
#ifdef DEBUG
#define __name(d) #d
#define dprintf(fmt, ...) printf("[dbg @" __FILE__ "->%s:%d] " fmt "\n", __func__, __LINE__, ## __VA_ARGS__)
#else
#define dprintf(fmt, ...)
#endif
/// Enabled threaded scheduling
// Set to 1 to FORCE threaded scheduling, 0 to use when opportune.
//
//#define _RUN_THREADED 0
extern const char* _prog_name;
#endif /* _FCMP_H */

@ -17,6 +17,10 @@ typedef struct mmap {
int open_and_map(const char* file, mmap_t* restrict ptr);
int unmap_and_close(mmap_t map);
int set_preload_map(mmap_t* restrict map);
/// Undo a previous call to `set_preload_map()`.
/// *freeing* (1 - yes, 0, no) - If the next operation on the map will be `unmap_and_close()`, advise the kernel to drop the mapped pages whenever it wants. Do not specify this if you will use the map again after this call.
int unset_preload_map(mmap_t* restrict map, int freeing);
#ifdef _cplusplus
}

@ -0,0 +1,22 @@
#ifndef _SCHED_H
#define _SCHED_H
#include <vector.h>
#include <fcmp.h>
typedef struct tasklist {
size_t argc;
struct taskarg* argv;
pthread_t* tasks;
} tasklist_t;
typedef void (*sched_cb)(vec_t* restrict tasklist);
#ifdef _RUN_THREADED
bool sched_spawn(vec_t full, sched_cb callback, struct tasklist *restrict t_list);
void sched_wait(struct tasklist* restrict t_list);
bool sched_should(size_t ntasks);
#endif
#endif /* _SHCED_H */

@ -0,0 +1,28 @@
#ifndef _VECTOR_H
#define _VECTOR_H
#include <stdlib.h>
#include <stdbool.h>
#include "fcmp.h"
typedef struct {
size_t len, cap;
size_t element, scap;
void* ptr;
} vec_t;
#define VEC_DEFAULT_CAP 16
vec_t vec_new_with_cap(size_t elem, size_t cap);
void vec_push(vec_t* restrict self, const void* restrict item);
bool vec_pop(vec_t* restrict self, void* restrict item);
void* vec_index(const vec_t* restrict self, size_t i);
vec_t vec_clone(const vec_t* restrict self);
_FORCE_INLINE vec_t vec_new(size_t elem) { return vec_new_with_cap(elem, VEC_DEFAULT_CAP); }
_FORCE_INLINE void vec_free(vec_t v) { free(v.ptr); }
#endif /* _VECTOR_H */

@ -0,0 +1,33 @@
#!/bin/bash
# Generate 100 matching
bound=$1
ITERATIONS=${ITERATIONS:-100}
BREAK_AT=50
cd $2 || exit 1
echo ">>> Generating ${ITERATIONS} matching files at $2/matching"
mkdir matching
dd if=/dev/urandom of=./matching/0 bs=$bound count=1 >> /dev/null 2>&1 || exit 1
pushd matching >>/dev/null
for i in $(seq 1 ${ITERATIONS}); do
cp -f 0 $i || exit 1
done
popd >>/dev/null
echo ">>> Generatig ${ITERATIONS} with unmatching files at $2/unmatching"
mkdir unmatching
dd if=/dev/urandom of=./unmatching/0 bs=$bound count=1 >> /dev/null 2>&1 || exit 1
pushd unmatching >> /dev/null
for i in $(seq 1 ${ITERATIONS}); do
if [[ $i == ${BREAK_AT} ]]; then
echo " $i < unmatching"
dd if=/dev/urandom of=$i bs=$bound count=1 >>/dev/null 2>&1 || exit
else
cp -f 0 $i || exit 1
fi
done
popd >> /dev/null
echo ">>> Generated in $2"

@ -2,30 +2,42 @@
#include <stdio.h>
#include <string.h>
#include <map.h>
static const char* _prog_name = "fcmp";
#include <fcmp.h>
#ifdef DEBUG
#define __name(d) #d
#define dprintf(fmt, ...) printf("[dbg @" __FILE__ "->%s:%d] " fmt "\n", __func__, __LINE__ __VA_OPT__(,) __VA_ARGS__)
#else
#define dprintf(fmt, ...)
#include <map.h>
#include <vector.h>
#ifdef _RUN_THREADED
#include <sched.h>
#endif
__attribute__((noreturn)) static void usage()
const char* _prog_name = "fcmp";
__attribute__((noreturn, noinline)) void usage()
{
fprintf(stderr, "fcmp: compare files for identity\n");
fprintf(stderr, "usage: %s <files...>\n", _prog_name);
exit(-1);
}
__attribute__((always_inline)) static inline const void* die_if_null(const void* ptr)
_FORCE_INLINE const void* die_with_usage_if_null(const void* ptr)
{
if (!ptr) usage();
else return ptr;
}
static void unprep_map(mmap_t* restrict map, bool free)
{
//TODO: Should we actually bother to call this before unmapping?
if(!unset_preload_map(map, (int)free))
fprintf(stderr, "Error: failed to unprep map %p (%d)%s, continuing anyway\n",
map->ptr, map->fd,
(free ? " before closing" : ""));
else dprintf("unprep'd %p (%d)", map->ptr, map->fd);
}
static int unmap_all(mmap_t ptrs[], size_t len)
{
register int rval=1;
@ -40,6 +52,7 @@ static int unmap_all(mmap_t ptrs[], size_t len)
return rval;
}
static int compare_then_close(const mmap_t * restrict map1, mmap_t map2)
{
register int rval=0;
@ -55,6 +68,74 @@ static int compare_then_close(const mmap_t * restrict map1, mmap_t map2)
return rval;
}
static void prep_map(mmap_t* restrict map)
{
if(!set_preload_map(map))
fprintf(stderr, "Error: failed to prep map %p (%d), continuing anyway\n", map->ptr, map->fd);
else dprintf("prep'd %p (%d)", map->ptr, map->fd);
}
#ifdef _RUN_THREADED
struct t_task {
_Atomic int* _ALIAS othis;
_Atomic bool* _ALIAS ocontinue;
int ithis;
const char* fthis;
mmap_t mthis;
const mmap_t* map1;
};
void proc_thread(vec_t* restrict v_tasks)
{
struct t_task * tasks = v_tasks->ptr;
mmap_t mrest[v_tasks->len];
#ifdef DEBUG
const char* frest[v_tasks->len];
#endif
int nrest = v_tasks->len;
const mmap_t* map1;
{
for(register int i=0;i<v_tasks->len;i++)
{
// Copy map into local buffer
mrest[i] = tasks[i].mthis;
// Prep this map
prep_map(&mrest[i]);
#ifdef DEBUG
frest[i] = tasks[i].fthis;
#endif
}
map1 = tasks[0].map1;
}
register int rval=0;
for(register int i=0;i<nrest;i++)
{
if (! *tasks[0].ocontinue) {
dprintf("Signalled to drop rest of tasks");
unmap_all(mrest+i, nrest-i);
break;
}
dprintf("Checking %d \"%s\"", tasks[i].ithis, frest[i]);
switch ((rval=compare_then_close(map1, mrest[i]))) {
case 0: break;
default:
// Close the rest
dprintf("Unmapping mrest from %d (len %d) while max of nrest is %d", (i+1), nrest-(i+1), nrest);
if(i<nrest-1) unmap_all(mrest+ (i+1), nrest- (i+1));
*tasks[0].ocontinue = false;
goto end;
}
dprintf("Ident %d OK", tasks[i].ithis);
}
end:
*tasks[0].othis = rval;
}
#endif
int main(int argc, char** argv)
{
_prog_name = argv[0];
@ -62,11 +143,11 @@ int main(int argc, char** argv)
const int nrest = argc-2;
if (nrest==0) usage();
dprintf("There are %d extra files to chk", nrest);
const char* f1 = die_if_null(argv[1]);
const char* f1 = die_with_usage_if_null(argv[1]);
const char* frest[nrest];
for (register int i=0;i<nrest;i++) {
frest[i] = die_if_null(argv[2+i]);
frest[i] = die_with_usage_if_null(argv[2+i]);
dprintf("frest[%d] = \"%s\"", i, frest[i]);
}
@ -78,6 +159,9 @@ int main(int argc, char** argv)
return -1;
}
// Prep map 1
prep_map(&map1);
for(register int i=0;i<nrest;i++) {
const char* f2 = frest[i];
dprintf("Attempting to map %d (%s)", i, f2);
@ -86,11 +170,61 @@ int main(int argc, char** argv)
unmap_and_close(map1);
unmap_all(mrest, i);
return -1;
}
}
#ifdef _RUN_THREADED
// Prep the new map immediately if single threaded.
else if(! (sched_should(nrest) || _RUN_THREADED)) prep_map(&mrest[i]);
#endif
}
dprintf("All map okay");
register int rval=0;
#ifdef _RUN_THREADED
if(sched_should(nrest) || _RUN_THREADED) {
dprintf("Running multi-threaded");
_Atomic int rvals[nrest];
_Atomic bool sync_cont = true;
vec_t vtask_args = vec_new_with_cap(sizeof(struct t_task), nrest);
struct t_task* task_args = vtask_args.ptr;
for (int i=0;i<nrest;i++) {
// Set default return value for task (0).
rvals[i] = 0;
// Set task params
task_args[i] = (struct t_task){
.ithis = i,
.fthis = frest[i],
.mthis = mrest[i],
.map1 = &map1,
.othis = &rvals[i],
.ocontinue = &sync_cont,
};
}
vtask_args.len = (size_t)nrest;
tasklist_t threads;
if(!sched_spawn(vtask_args, &proc_thread, &threads)) {
fprintf(stderr, "Failed to spawn tasks\n");
abort(); //no clear way to exit gracefully from this...
}
vec_free(vtask_args);
dprintf("Children spawned");
sched_wait(&threads);
// Waited, we can now unprep map1
unprep_map(&map1, true);
for (register int i=0;i<nrest;i++) {
if(rvals[i]) {
rval = rvals[i];
break;
}
}
goto end;
} else {
#endif
dprintf("Running single threaded");
for(register int i=0;i<nrest;i++) {
dprintf("Checking %d \"%s\"", i, frest[i]);
switch ((rval=compare_then_close(&map1, mrest[i]))) {
@ -103,13 +237,17 @@ int main(int argc, char** argv)
}
dprintf("Ident %d OK", i);
}
#ifdef _RUN_THREADED
}
#endif
end:
dprintf("Unmapping `map1`");
if(!unmap_and_close(map1)) {
fprintf(stderr, "Failed to unmap and close %s", f1);
rval=-1;
}
dprintf("Final rval is %d", rval);
return rval;
}

@ -10,8 +10,37 @@
#define FILEMODE S_IRWXU | S_IRGRP | S_IROTH
#define DEFAULT_ADVICE MADV_SEQUENTIAL
#define ADVICE DEFAULT_ADVICE | MADV_WILLNEED
#define UNADVICE MADV_NORMAL | MADV_COLD
#include <map.h>
static inline int _map_advise(const mmap_t* restrict map, int adv)
{
return madvise(map->ptr, map->len, adv);
}
int unset_preload_map(mmap_t* restrict map, int freeing)
{
if(_map_advise(map, UNADVICE | (freeing ? MADV_DONTNEED : 0)) != 0) {
perror("failed to advise kernel to drop mapped page(s)");
return 0;
}
return 1;
}
int set_preload_map(mmap_t* restrict map)
{
if(_map_advise(map, ADVICE) != 0) {
perror("failed to advise kernel to preload mapped page(s)");
return 0;
}
return 1;
}
int open_and_map(const char* file, mmap_t* restrict ptr)
{
int fd;
@ -27,7 +56,7 @@ int open_and_map(const char* file, mmap_t* restrict ptr)
return 0;
}
register struct mmap map = { .fd = fd, .ptr = NULL, .len = st.st_size };
struct mmap map = { .fd = fd, .ptr = NULL, .len = st.st_size };
if ((map.ptr = mmap(NULL, map.len, PROT_READ, MAP_SHARED,fd, 0)) == MAP_FAILED) {
perror("mmap() failed");
@ -35,8 +64,15 @@ int open_and_map(const char* file, mmap_t* restrict ptr)
return 0;
}
if(_map_advise(&map, DEFAULT_ADVICE) != 0) {
perror("madvise(): failed to set default advice");
//XXX: Should this be a hard error, or should we return the map if this fails anyway?
unmap_and_close(map);
return 0;
}
*ptr = map;
return 1;
}

@ -0,0 +1,86 @@
// Scheduler
#include <unistd.h>
#include <stdint.h>
#include <stdio.h>
#include <vector.h>
#include <pthread.h>
#ifdef _RUN_THREADED
inline static size_t num_cpus() {
return sysconf( _SC_NPROCESSORS_ONLN );
}
struct taskarg {
vec_t li;
sched_cb cb;
};
static void* _spawn(void* _arg)
{
struct taskarg* restrict arg = _arg;
if(arg->li.len>0)
arg->cb(&arg->li);
vec_free(arg->li);
return NULL;
}
bool sched_should(size_t ntasks)
{
static size_t num = 0;
// XXX: This is not thread-safe, but this function is only ever called by the main thread, so...
if(!num) num = num_cpus();
return (num > 1 && ntasks > 1);
}
bool sched_spawn(vec_t full, sched_cb callback, struct tasklist *restrict t_list)
{
register size_t spn = num_cpus() + 1;
if (spn > full.len) spn = full.len;
dprintf("Spawning %lu worker threads", spn);
// Split tasks
*t_list = (struct tasklist){
.argc = spn,
.argv = calloc(sizeof(struct taskarg), spn),
.tasks = calloc(sizeof(pthread_t), spn),
};
struct taskarg* tasklist = t_list->argv;
for(register int i=0;i<spn;i++) tasklist[i] = (struct taskarg){.li = vec_new_with_cap(full.element, full.len), .cb = callback };
for (register int i=0;i<full.len;i++)
{
vec_push(&tasklist[i%spn].li, vec_index(&full, i));
}
for(register int i=0;i<spn;i++)
{
if(pthread_create(&t_list->tasks[i], NULL, &_spawn, &tasklist[i]))
{
perror("Failed to spawn thread");
return false;
}
dprintf("Worker thead %d of %lu OK", i, spn);
}
return true;
}
void sched_wait(struct tasklist* restrict t_list)
{
dprintf("Waiting on %lu worker threads", t_list->argc);
for (size_t i=0;i<t_list->argc;i++) {
if(pthread_join(t_list->tasks[i], NULL)) {
perror("Failed to join thread");
continue;
}
dprintf("Joined thread %lu of %lu okay", i, t_list->argc);
}
free(t_list->tasks);
free(t_list->argv);
dprintf("Freed args and thread handles okay");
}
#endif

@ -0,0 +1,61 @@
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <vector.h>
_FORCE_INLINE void* die_if_null(void* ptr)
{
if (!ptr) abort();
return ptr;
}
vec_t vec_new_with_cap(size_t elem, size_t cap) {
return (vec_t){
.len =0,
.cap = cap,
.scap = cap,
.element = elem,
.ptr = die_if_null(calloc(elem, cap)),
};
}
static inline void vec_extend_one(vec_t* restrict self)
{
self->ptr = die_if_null(reallocarray(self->ptr, self->element, (self->cap+=self->scap)));
}
void* vec_index(const vec_t* restrict self, size_t i)
{
if (i >= self->len) return NULL;
return (void*)(((uintptr_t)self->ptr)+ (self->element*i));
}
void vec_push(vec_t* restrict self, const void* restrict item)
{
if (self->len >= self->cap) {
vec_extend_one(self);
}
memcpy(die_if_null(vec_index(self, self->len++)), item, self->element);
}
bool vec_pop(vec_t* restrict self, void* restrict item)
{
if (self->len>0) {
memcpy(item, die_if_null(vec_index(self, self->len--)), self->element);
return true;
} else return false;
}
vec_t vec_clone(const vec_t* restrict self)
{
register vec_t new = {
.len = self->len,
.cap = self->cap,
.element = self->element,
.scap = self->scap,
.ptr = die_if_null(calloc(self->element, self->cap)),
};
memcpy(self->ptr, new.ptr, new.len * new.element);
return new;
}
Loading…
Cancel
Save