From 55f611b1affa53398edb2288c770b7e80f8ff588 Mon Sep 17 00:00:00 2001 From: Avril Date: Wed, 10 Feb 2021 18:19:03 +0000 Subject: [PATCH] graph needs redesign... --- src/data/graph.rs | 194 ++++++++++++++++++++++++++++++++++++++++++++-- src/main.rs | 5 +- 2 files changed, 192 insertions(+), 7 deletions(-) diff --git a/src/data/graph.rs b/src/data/graph.rs index 5ef25e9..2aead7c 100644 --- a/src/data/graph.rs +++ b/src/data/graph.rs @@ -1,6 +1,79 @@ use super::*; use std::collections::HashMap; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; +use std::borrow::Borrow; +use std::iter::FusedIterator; +use std::cell::RefCell; + +#[derive(Debug, Clone, PartialEq, Eq)] +/// A reference to an INode within a graph. +pub struct INodeRef<'a>(&'a INodeInfoGraph, INode); + +impl<'a> AsRef for INodeRef<'a> +{ + fn as_ref(&self) -> &Path + { + self.0.paths_reverse.get(&self.1).unwrap() + } +} + +impl<'a> Borrow for INodeRef<'a> +{ + fn borrow(&self) -> &INode + { + &self.1 + } +} + + +impl<'a> INodeRef<'a> +{ + /// The owning graph of this INode reference. + #[inline] pub fn graph(&self) -> &INodeInfoGraph + { + self.0 + } + + /// The `FsInfo` for this INode + #[inline] pub fn info(&self) -> &FsInfo + { + self.0.inodes.get(&self.1).unwrap() + } + + /// Compute the total size of this INode. + /// + /// If this is a file, it is O(1). Otherwise, recursively compute the sizes of all children. + #[inline] pub fn size(&self) -> u64 + { + self.0.lookup_size_or(&self.1, move || { + match self.info() + { + FsInfo::File(sz, _) => *sz, + FsInfo::Directory(_) => { + self.children().map(|x| x.size()).sum() + }, + } + }) + } + + /// The internal INode + #[inline] pub fn inode(&self) -> INode + { + self.1 + } + + /// The path this INode refers to + #[inline] pub fn path(&self) -> &Path + { + self.as_ref() + } + + /// Get an iterator over the children of this INode, if it has children. + #[inline] pub fn children(&self) -> Children<'a> + { + self.0.children_of(&self.1) + } +} /// Contains a graph of all paths and inodes that were successfully stat'd #[derive(Debug, Clone, PartialEq, Eq)] @@ -8,19 +81,130 @@ pub struct INodeInfoGraph { inodes: HashMap, // FsInfo contains parent INode that can be used to look up again in this table paths: HashMap, // map absolute paths to INodes to be looked up in `inodes` table. + + paths_reverse: HashMap, // reverse lookup of INode to PathBuf, + + children: HashMap>, //reverse lookup for directory INodes and their parent INodes + + total_sizes_cached: RefCell>, // the total sizes of all INodes } impl INodeInfoGraph { + fn lookup_insert_size(&self, node: INode, with: F) -> u64 + where F: FnOnce() -> u64 + { + let w = with(); + + if let Ok(mut cache) = self.total_sizes_cached.try_borrow_mut() + { + cache.insert(node, w); + w + } else { + w + } + } + fn lookup_size_or(&self, node: impl Borrow, or: F) -> u64 + where F: FnOnce() -> u64 + { + if let Ok(cache) = self.total_sizes_cached.try_borrow() { + let node = node.borrow(); + if let Some(sz) = cache.get(node) { + return *sz; + } + } + self.lookup_insert_size(*node.borrow(), or) + } /// Create a new graph from these linked `HashMap`s #[inline] pub fn new(inodes: HashMap, paths: HashMap) -> Self { Self { + children: HashMap::with_capacity(inodes.len()), + paths_reverse: HashMap::with_capacity(paths.len()), + total_sizes_cached: RefCell::new(HashMap::with_capacity(inodes.len())), inodes, - paths + paths, + } + .compute_child_table() + .compute_reverse_path_table() + } + #[inline] fn compute_child_table(mut self) -> Self + { + for (node, info) in self.inodes.iter() + { + match info { + FsInfo::Directory(parent_node) | + FsInfo::File(_, parent_node) => { + self.children.entry(*parent_node).or_insert_with(|| Vec::new()).push(*node); + }, + } } + + self + } + #[inline] fn compute_reverse_path_table(mut self) -> Self + { + self.paths_reverse.extend(self.paths.iter().map(|(x,y)| (*y,x.clone()))); + self + } + /// Get the FsInfo of this `INode` + #[inline] pub fn get_info(&self, node: impl Borrow) -> Option<&FsInfo> + { + self.inodes.get(node.borrow()) + } + /// An iterator over top-level children of this node + pub fn children_of(&self, node: impl Borrow) -> Children<'_> + { + Children(self, match self.children.get(node.borrow()) { + Some(slc) => slc.iter(), + _ => [].iter(), + }) + } + /// An iterator over all the directories in this + pub fn directories(&self) -> Directories<'_> + { + Directories(self, self.children.keys()) + } +} + +/// An iterator over all directories in a graph +#[derive(Debug, Clone)] +pub struct Directories<'a>(&'a INodeInfoGraph, std::collections::hash_map::Keys<'a, INode, Vec>); + +impl<'a> Iterator for Directories<'a> +{ + type Item = INodeRef<'a>; + #[inline] fn next(&mut self) -> Option + { + self.1.next().map(|x| INodeRef(self.0, *x)) + } + #[inline] fn size_hint(&self) -> (usize, Option) { + self.1.size_hint() + } +} +impl<'a> ExactSizeIterator for Directories<'a>{} +impl<'a> FusedIterator for Directories<'a>{} + +/// The immediate children of a specific INode +#[derive(Debug, Clone)] +pub struct Children<'a>(&'a INodeInfoGraph, std::slice::Iter<'a, INode>); + +impl<'a> Iterator for Children<'a> +{ + type Item = INodeRef<'a>; + #[inline] fn next(&mut self) -> Option + { + self.1.next().map(|x| INodeRef(self.0, *x)) + } + fn size_hint(&self) -> (usize, Option) { + self.1.size_hint() + } +} +impl<'a> ExactSizeIterator for Children<'a>{} +impl<'a> FusedIterator for Children<'a>{} +impl<'a> DoubleEndedIterator for Children<'a> +{ + #[inline] fn next_back(&mut self) -> Option { + self.1.next_back().map(|x| INodeRef(self.0, *x)) } - //TODO: Get whole directory structure. Find largest, etc. - //TODO: Order by largest file size, get, iter, etc - //TODO: Group children to parent (child FSInfos all have references to their parent INode, but parents don't have references to their children. Top level FsInfos will also have parent INodes that don't appear in the map as keys.) } diff --git a/src/main.rs b/src/main.rs index 392514d..0f08217 100644 --- a/src/main.rs +++ b/src/main.rs @@ -48,8 +48,9 @@ async fn main() -> eyre::Result<()> { .with_suggestion(|| "Try running `--help`")?); let graph = work::work_on_all(state).await; - - println!("{:?}", graph); + let max_size = graph.directories().map(|x| x.size()).max(); + println!("Max size: {:?}", max_size); + //println!("{:?}", graph); Ok(()) }