@ -1,42 +1,83 @@
use super ::* ;
use std ::{
collections ::HashSet ,
collections ::{ HashMap , HashSet } ,
io ::{
self ,
Write ,
Read ,
} ,
path ::{
Path ,
PathBuf
} ,
} ;
#[ derive(Clone, PartialEq, Eq, Debug) ]
pub struct DupeMap ( HashSet < hash ::Sha256Hash > ) ;
pub struct DupeMap
{
iteration : HashSet < hash ::Sha256Hash > , // What we calculate
table : HashMap < PathBuf , hash ::Sha256Hash > , // What we save and load
}
/// Do we care about windows? nah
#[ inline ]
fn path_bytes ( path : & Path ) -> & [ u8 ]
{
use std ::os ::unix ::ffi ::OsStrExt ;
path . as_os_str ( ) . as_bytes ( )
}
#[ inline ]
fn bytes_path ( bytes : & [ u8 ] ) -> & Path
{
use std ::os ::unix ::ffi ::OsStrExt ;
std ::ffi ::OsStr ::from_bytes ( bytes ) . as_ref ( )
}
const ENTRY_HEADER : & [ u8 ] = & [ 0x00 , 0xde , 0xad ] ;
impl DupeMap
{
/// Create a new empty dupe map
pub fn new ( ) -> Self
{
Self ( HashSet ::new ( ) )
Self {iteration : HashSet ::new ( ) , table : HashMap ::new ( ) }
}
/// Iterator over all added keys
/// Iterator over all hashe s
pub fn iter ( & self ) -> std ::collections ::hash_set ::Iter < hash ::Sha256Hash >
{
self . 0. iter ( )
self . iteration . iter ( )
}
/// Cache this path's hash
///
/// # Returns
///
/// True if caching was okay, false if key already added.
pub fn cache < T : AsRef < Path > > ( & mut self , id : T , hash : hash ::Sha256Hash ) -> bool
{
if self . table . contains_key ( id . as_ref ( ) ) {
false
} else {
self . table . insert ( id . as_ref ( ) . to_owned ( ) , hash ) ;
true
}
}
/// Is this hash in the set?
pub fn peek ( & self , hash : & hash ::Sha256Hash ) -> bool {
self . 0. contains ( hash )
/// Look for path `id` in cache.
pub fn get_cache < T : AsRef < Path > > ( & self , id : T ) -> Option < & hash ::Sha256Hash >
{
self . table . get ( id . as_ref ( ) )
}
/// Try to add an entry, returns true if was not a dupe, false if it was.
/// Try to add to store. True if adding was oke, false if already exist s.
pub fn try_add ( & mut self , hash : hash ::Sha256Hash ) -> bool
{
if self . 0. contains ( & hash ) {
if self . iteration .contains ( & hash ) {
false
} else {
self . 0. insert ( hash ) ;
self . iteration .insert ( hash ) ;
true
}
}
@ -45,9 +86,15 @@ impl DupeMap
pub fn save < W : Write > ( & self , to : & mut W ) -> io ::Result < usize >
{
let mut done = 0 ;
for x in self . 0 .iter ( )
for ( path , hash ) in self . table .iter ( )
{
to . write ( x . as_ref ( ) ) ? ;
let path = path_bytes ( path . as_ref ( ) ) ;
let hash : & [ u8 ] = hash . as_ref ( ) ;
to . write ( ENTRY_HEADER ) ? ;
to . write ( bytes ::reinterpret ( & path . len ( ) ) ) ? ;
to . write ( path ) ? ;
to . write ( hash ) ? ;
done + = 1 ;
}
Ok ( done )
@ -60,9 +107,15 @@ impl DupeMap
use tokio ::prelude ::* ;
let mut done = 0 ;
for x in self . 0 .iter ( )
for ( path , hash ) in self . table .iter ( )
{
to . write ( x . as_ref ( ) ) . await ? ;
let path = path_bytes ( path . as_ref ( ) ) ;
let hash : & [ u8 ] = hash . as_ref ( ) ;
to . write ( ENTRY_HEADER ) . await ? ;
to . write ( bytes ::reinterpret ( & path . len ( ) ) ) . await ? ;
to . write ( path ) . await ? ;
to . write ( hash ) . await ? ;
done + = 1 ;
}
Ok ( done )
@ -73,15 +126,26 @@ impl DupeMap
{
let mut done = 0 ;
let mut read ;
let mut buffer = [ 0 u8 ; hash ::SHA256_SIZE ] ;
let mut header_buffer = [ 0 u8 ; ENTRY_HEADER . len ( ) + std ::mem ::size_of ::< usize > ( ) ] ;
let mut hash_buffer = [ 0 u8 ; hash ::SHA256_SIZE ] ;
while { read = from . read ( & mut buffer [ .. ] ) ? ; read = = hash ::SHA256_SIZE } {
done + = if self . try_add ( hash ::Sha256Hash ::new ( buffer ) ) {
1
} else {
0
} ;
while { read = from . read ( & mut header_buffer [ .. ] ) ? ; read = = header_buffer . len ( ) & & & header_buffer [ .. ENTRY_HEADER . len ( ) ] = = ENTRY_HEADER }
{
let sz = * bytes ::reinterpret_back ( & header_buffer [ ENTRY_HEADER . len ( ) .. ] ) ;
if sz > 0 {
let mut path = vec! [ 0 u8 ; sz ] ;
if from . read ( & mut path [ .. ] ) ? = = sz {
let path = bytes_path ( & path [ .. ] ) ;
if from . read ( & mut hash_buffer [ .. ] ) ? = = hash ::SHA256_SIZE
{
if self . cache ( path , hash ::Sha256Hash ::new ( hash_buffer ) ) {
done + = 1 ;
}
}
}
}
}
Ok ( done )
}
@ -94,15 +158,26 @@ impl DupeMap
let mut done = 0 ;
let mut read ;
let mut buffer = [ 0 u8 ; hash ::SHA256_SIZE ] ;
let mut header_buffer = [ 0 u8 ; ENTRY_HEADER . len ( ) + std ::mem ::size_of ::< usize > ( ) ] ;
let mut hash_buffer = [ 0 u8 ; hash ::SHA256_SIZE ] ;
while { read = from . read ( & mut buffer [ .. ] ) . await ? ; read = = hash ::SHA256_SIZE } {
done + = if self . try_add ( hash ::Sha256Hash ::new ( buffer ) ) {
1
} else {
0
} ;
while { read = from . read ( & mut header_buffer [ .. ] ) . await ? ; read = = header_buffer . len ( ) & & & header_buffer [ .. ENTRY_HEADER . len ( ) ] = = ENTRY_HEADER }
{
let sz = * bytes ::reinterpret_back ( & header_buffer [ ENTRY_HEADER . len ( ) .. ] ) ;
if sz > 0 {
let mut path = vec! [ 0 u8 ; sz ] ;
if from . read ( & mut path [ .. ] ) . await ? = = sz {
let path = bytes_path ( & path [ .. ] ) ;
if from . read ( & mut hash_buffer [ .. ] ) . await ? = = hash ::SHA256_SIZE
{
if self . cache ( path , hash ::Sha256Hash ::new ( hash_buffer ) ) {
done + = 1 ;
}
}
}
}
}
Ok ( done )
}
}