Compare commits
No commits in common. 'master' and 'threaded' have entirely different histories.
@ -1,25 +0,0 @@
|
||||
[package]
|
||||
name = "fcmprs"
|
||||
version = "0.1.0"
|
||||
authors = ["Avril <flanchan@cumallover.me>"]
|
||||
edition = "2018"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[features]
|
||||
default = ["threads"]
|
||||
|
||||
threads = ["rayon"]
|
||||
|
||||
[profile.release]
|
||||
opt-level = 3
|
||||
lto = "fat"
|
||||
codegen-units = 1
|
||||
panic = "abort"
|
||||
|
||||
[dependencies]
|
||||
cfg-if = "1.0.0"
|
||||
memmap = "0.7.0"
|
||||
once_cell = "1.5.2"
|
||||
rayon = {version = "1.5.0", optional = true}
|
||||
smallvec = "1.5.0"
|
@ -1,76 +0,0 @@
|
||||
|
||||
PROJECT = fcmprs
|
||||
|
||||
CARGO_FEATURES?= threads
|
||||
|
||||
OPT_FLAGS?= -C target-cpu=native
|
||||
RUSTFLAGS?=
|
||||
|
||||
PROF_DIR=/tmp/fcmprs/prof
|
||||
PROF_ITERATIONS?=100
|
||||
PROF_LOCATION?=/tmp/fcmprs-profiters
|
||||
|
||||
PROF_LARGE_BOUND= $$(( 1024 * 1024 * 10 ))
|
||||
PROF_SMALL_BOUND= $$(( 1024 * 10 ))
|
||||
|
||||
.PHONY: release
|
||||
release: target/release/$(PROJECT)
|
||||
|
||||
.PHONY: debug
|
||||
debug: target/debug/$(PROJECT)
|
||||
|
||||
.PHONY: pgo
|
||||
pgo: target/release/$(PROJECT)-pgo
|
||||
# Currently broken.
|
||||
|
||||
|
||||
target/release/$(PROJECT): RUSTFLAGS+= $(OPT_FLAGS)
|
||||
target/release/$(PROJECT):
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo build --release --no-default-features $(addprefix --features ,$(CARGO_FEATURES))
|
||||
strip $@
|
||||
|
||||
target/debug/$(PROJECT):
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo build --no-default-features $(addprefix --features ,$(CARGO_FEATURES))
|
||||
|
||||
pgo-generate: RUSTFLAGS+= -Cprofile-generate=$(PROF_DIR)
|
||||
pgo-generate: target/release/$(PROJECT)
|
||||
|
||||
pgo-profile: pgo-generate
|
||||
for i in {1..$(PROF_ITERATIONS)}; do \
|
||||
rm -rf $(PROF_LOCATION); \
|
||||
mkdir -p $(PROF_LOCATION)/{large,small}; \
|
||||
printf "Iteration $$i of $(PROF_ITERATIONS)\r"; \
|
||||
../profile/gen $(PROF_LARGE_BOUND) "$(PROF_LOCATION)/large" >> /dev/null; \
|
||||
../profile/gen $(PROF_SMALL_BOUND) "$(PROF_LOCATION)/small" >> /dev/null; \
|
||||
./target/release/fcmprs $(PROF_LOCATION)/large/matching/* & > $(PROF_LOCATION)/stdout; \
|
||||
./target/release/fcmprs $(PROF_LOCATION)/large/unmatching/* & > $(PROF_LOCATION)/stdout; \
|
||||
./target/release/fcmprs $(PROF_LOCATION)/small/matching/* & > $(PROF_LOCATION)/stdout; \
|
||||
./target/release/fcmprs $(PROF_LOCATION)/small/unmatching/* & > $(PROF_LOCATION)/stdout; \
|
||||
\
|
||||
./target/release/fcmprs $(PROF_LOCATION)/small/matching/{1,2} & > $(PROF_LOCATION)/stdout; \
|
||||
./target/release/fcmprs $(PROF_LOCATION)/large/matching/{1,2} & > $(PROF_LOCATION)/stdout; \
|
||||
./target/release/fcmprs $(PROF_LOCATION)/small/unmatching/{1,2} & > $(PROF_LOCATION)/stdout; \
|
||||
./target/release/fcmprs $(PROF_LOCATION)/large/unmatching/{1,2} & > $(PROF_LOCATION)/stdout; \
|
||||
wait; \
|
||||
rm -rf $(PROF_LOCATION)/{large,small}; \
|
||||
done
|
||||
@echo ""
|
||||
rm -rf $(PROF_LOCATION)
|
||||
llvm-profdata merge -o $(PROF_DIR)/merged.profdata $(PROF_DIR)
|
||||
|
||||
pgo-use: RUSTFLAGS+= -Cprofile-use=$(PROF_DIR)/merged.profdata -Cllvm-args=-pgo-warn-missing-function
|
||||
pgo-use: target/release/$(PROJECT)
|
||||
|
||||
pgo-reset:
|
||||
rm -rf $(PROF_DIR)
|
||||
mkdir -p $(PROF_DIR)
|
||||
|
||||
|
||||
target/release/$(PROJECT)-pgo: | pgo-reset pgo-profile
|
||||
#rm -rf target
|
||||
CARGO_INCREMENTAL=0 $(MAKE) -B pgo-use
|
||||
mv -f target/release/$(PROJECT) $@
|
||||
|
||||
clean:
|
||||
rm -rf target
|
||||
rm -rf $(PROF_LOCATION) $(PROF_DIR)
|
@ -1,42 +0,0 @@
|
||||
use std::{fmt,error};
|
||||
|
||||
#[derive(Debug)]
|
||||
/// There was a non-matching file
|
||||
pub enum UnmatchError
|
||||
{
|
||||
Size,
|
||||
Data,
|
||||
Unknown,
|
||||
}
|
||||
|
||||
impl error::Error for UnmatchError{}
|
||||
impl fmt::Display for UnmatchError
|
||||
{
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result
|
||||
{
|
||||
match self {
|
||||
Self::Size => write!(f, "size differs"),
|
||||
Self::Data => write!(f, "data differs"),
|
||||
_ => write!(f, "unknown error"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub trait ResultPrintExt<T>
|
||||
{
|
||||
fn discard_msg(self, msg: impl AsRef<str>) -> Option<T>;
|
||||
}
|
||||
|
||||
impl<T, E> ResultPrintExt<T> for Result<T,E>
|
||||
where E: std::fmt::Display
|
||||
{
|
||||
fn discard_msg(self, msg: impl AsRef<str>) -> Option<T> {
|
||||
match self {
|
||||
Ok(v) => Some(v),
|
||||
Err(e) => {
|
||||
eprintln!("{}: {}", msg.as_ref(), e);
|
||||
None
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
@ -1,105 +0,0 @@
|
||||
|
||||
#![allow(dead_code)]
|
||||
|
||||
#[cfg(feature="threads")] use rayon::prelude::*;
|
||||
#[allow(unused_imports)]
|
||||
use std::{
|
||||
path::Path,
|
||||
io, fs::{self, OpenOptions,},
|
||||
convert::TryInto,
|
||||
};
|
||||
use smallvec::SmallVec;
|
||||
use cfg_if::cfg_if;
|
||||
|
||||
fn usage() -> !
|
||||
{
|
||||
eprintln!("fcmprs: Compare files for identity");
|
||||
eprintln!("Usage: {} <files...>", std::env::args().next().unwrap());
|
||||
|
||||
std::process::exit(-1)
|
||||
}
|
||||
|
||||
mod error;
|
||||
use error::ResultPrintExt as _;
|
||||
|
||||
mod map;
|
||||
use map::MappedFile as _;
|
||||
|
||||
use error::UnmatchError;
|
||||
|
||||
fn main() {
|
||||
let (map1, rest) = {
|
||||
let mut args = std::env::args().skip(1);
|
||||
if let Some(one) = args.next() {
|
||||
(one, args)
|
||||
} else {
|
||||
usage();
|
||||
}
|
||||
};
|
||||
|
||||
std::process::exit({
|
||||
if let Some(map1) = map::map(&map1).discard_msg(format!("Failed to map file {}", map1)) {
|
||||
let slice = map1.as_slice();
|
||||
#[cfg(feature="threads")] let map1_sz: u64 = slice.len().try_into().expect("File size could not fit into u64. This should never happen."); // For now, non-threaded mode doesn't use this.
|
||||
let mut ok = true;
|
||||
let chk: SmallVec<[_; 32]> = rest.filter_map(|filename| {
|
||||
let path = Path::new(&filename);
|
||||
if path.exists() && path.is_file() {
|
||||
map::map(path).discard_msg(format!("Failed to map file {}", filename))
|
||||
} else {
|
||||
eprintln!("File {} does not exist or is not a normal file", filename);
|
||||
ok=false;
|
||||
None
|
||||
}
|
||||
}).collect();
|
||||
|
||||
if !ok {
|
||||
-1
|
||||
} else {
|
||||
|
||||
cfg_if! {
|
||||
if #[cfg(feature="threads")] {
|
||||
match chk.into_par_iter()
|
||||
.map(|map| {
|
||||
if let Ok(stat) = map.as_file().metadata() {
|
||||
if stat.len() != map1_sz {
|
||||
return Err(UnmatchError::Size);
|
||||
}
|
||||
if !stat.is_file() {
|
||||
return Err(UnmatchError::Unknown);
|
||||
}
|
||||
}
|
||||
if slice == map.as_slice() {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(UnmatchError::Data)
|
||||
}
|
||||
})
|
||||
.try_reduce_with(|_, _| Ok(()))
|
||||
{
|
||||
Some(Ok(_)) => 0,
|
||||
Some(Err(UnmatchError::Data)) => 1,
|
||||
Some(Err(UnmatchError::Size)) => 2,
|
||||
None => usage(),
|
||||
_ => -1,
|
||||
}
|
||||
} else {
|
||||
match chk.into_iter()
|
||||
.map(|map| {
|
||||
slice == map.as_slice()
|
||||
})
|
||||
.try_fold((false, true), |(_, a), b| if a && b {Ok((true, true))} else {Err(UnmatchError::Data)})
|
||||
{
|
||||
Ok((true, _)) => 0,
|
||||
Ok((false, _)) => usage(),
|
||||
Err(_) => 1,
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
-1
|
||||
}
|
||||
})
|
||||
}
|
@ -1,135 +0,0 @@
|
||||
use super::*;
|
||||
|
||||
use once_cell::sync::OnceCell;
|
||||
|
||||
pub trait MappedFile
|
||||
{
|
||||
#[inline] fn as_slice(&self) -> &[u8]
|
||||
{
|
||||
&self.as_map()[..]
|
||||
}
|
||||
fn as_map(&self) -> &memmap::Mmap;
|
||||
fn as_file(&self) -> &fs::File;
|
||||
}
|
||||
|
||||
pub trait MappedFileNew: MappedFile + Sized
|
||||
{
|
||||
fn try_map(file: fs::File) -> io::Result<Self>;
|
||||
#[inline] fn map(file: fs::File) -> Self
|
||||
{
|
||||
Self::try_map(file).unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
/// Represents an open and memory mapped file
|
||||
#[derive(Debug)]
|
||||
pub struct MemMap
|
||||
{
|
||||
map: memmap::Mmap,
|
||||
file: fs::File,
|
||||
}
|
||||
|
||||
impl MappedFile for MemMap
|
||||
{
|
||||
/// Get the memory mapped portion as a slice
|
||||
fn as_slice(&self) -> &[u8] {
|
||||
&self.map[..]
|
||||
}
|
||||
fn as_map(&self) -> &memmap::Mmap {
|
||||
&self.map
|
||||
}
|
||||
#[inline] fn as_file(&self) -> &fs::File {
|
||||
&self.file
|
||||
}
|
||||
}
|
||||
impl MappedFileNew for MemMap
|
||||
{
|
||||
#[inline] fn try_map(file: fs::File) -> io::Result<Self>
|
||||
{
|
||||
Ok(MemMap {
|
||||
map: unsafe { memmap::Mmap::map(&file)? },
|
||||
file,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Attempt to map this file
|
||||
pub fn map_with<M: MappedFileNew>(file: &Path) -> io::Result<M>
|
||||
{
|
||||
let file = OpenOptions::new()
|
||||
.read(true)
|
||||
.open(file)?;
|
||||
|
||||
M::try_map(file)
|
||||
}
|
||||
|
||||
/// Type container for memory map
|
||||
pub type DefaultMapType = LazyMap;
|
||||
|
||||
/// Attempt to map this file to the `DefaultMapType`
|
||||
pub fn map(file: impl AsRef<Path>) -> io::Result<DefaultMapType>
|
||||
{
|
||||
map_with(file.as_ref())
|
||||
}
|
||||
|
||||
/// An open and maybe mapped file
|
||||
#[derive(Debug)]
|
||||
pub struct LazyMap
|
||||
{
|
||||
map: OnceCell<memmap::Mmap>,
|
||||
file: fs::File,
|
||||
}
|
||||
|
||||
impl LazyMap
|
||||
{
|
||||
#[inline(always)] fn get_map(&self) -> &memmap::Mmap
|
||||
{
|
||||
self.map.get_or_init(|| unsafe {memmap::Mmap::map(&self.file).expect("Lazy map failed")})
|
||||
}
|
||||
|
||||
#[inline(always)] fn try_get_map(&self) -> io::Result<&memmap::Mmap>
|
||||
{
|
||||
self.map.get_or_try_init(|| unsafe {memmap::Mmap::map(&self.file)})
|
||||
}
|
||||
|
||||
/// Is the memory mapped already?
|
||||
#[inline] pub fn is_mapped(&self) -> bool
|
||||
{
|
||||
self.map.get().is_some()
|
||||
}
|
||||
|
||||
/// Get the mapped portion if it is mapped, attempting a map if not
|
||||
#[inline] pub fn try_as_slice(&self) -> io::Result<&[u8]>
|
||||
{
|
||||
Ok(&self.try_get_map()?[..])
|
||||
}
|
||||
}
|
||||
|
||||
impl MappedFile for LazyMap
|
||||
{
|
||||
/// Get the memory mapped portion as a slice
|
||||
///
|
||||
/// Returns blank slice if mapping fails
|
||||
#[inline] fn as_slice(&self) -> &[u8]
|
||||
{
|
||||
self.try_get_map()
|
||||
.map(|x| &x[..])
|
||||
.unwrap_or(&[])
|
||||
}
|
||||
fn as_map(&self) -> &memmap::Mmap {
|
||||
self.map.get().unwrap()
|
||||
}
|
||||
#[inline] fn as_file(&self) -> &fs::File {
|
||||
&self.file
|
||||
}
|
||||
}
|
||||
impl MappedFileNew for LazyMap
|
||||
{
|
||||
#[inline] fn try_map(file: fs::File) -> io::Result<Self>
|
||||
{
|
||||
Ok(LazyMap {
|
||||
map: OnceCell::new(),
|
||||
file,
|
||||
})
|
||||
}
|
||||
}
|
@ -1,33 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Generate 100 matching
|
||||
bound=$1
|
||||
|
||||
ITERATIONS=${ITERATIONS:-100}
|
||||
BREAK_AT=50
|
||||
|
||||
cd $2 || exit 1
|
||||
|
||||
echo ">>> Generating ${ITERATIONS} matching files at $2/matching"
|
||||
mkdir matching
|
||||
dd if=/dev/urandom of=./matching/0 bs=$bound count=1 >> /dev/null 2>&1 || exit 1
|
||||
pushd matching >>/dev/null
|
||||
for i in $(seq 1 ${ITERATIONS}); do
|
||||
cp -f 0 $i || exit 1
|
||||
done
|
||||
popd >>/dev/null
|
||||
|
||||
echo ">>> Generatig ${ITERATIONS} with unmatching files at $2/unmatching"
|
||||
mkdir unmatching
|
||||
dd if=/dev/urandom of=./unmatching/0 bs=$bound count=1 >> /dev/null 2>&1 || exit 1
|
||||
pushd unmatching >> /dev/null
|
||||
for i in $(seq 1 ${ITERATIONS}); do
|
||||
if [[ $i == ${BREAK_AT} ]]; then
|
||||
echo " $i < unmatching"
|
||||
dd if=/dev/urandom of=$i bs=$bound count=1 >>/dev/null 2>&1 || exit
|
||||
else
|
||||
cp -f 0 $i || exit 1
|
||||
fi
|
||||
done
|
||||
popd >> /dev/null
|
||||
echo ">>> Generated in $2"
|
Loading…
Reference in new issue