From 2f0bbf5adb5942ea4dcad839f905e9f520d2fe8f Mon Sep 17 00:00:00 2001 From: Darren Schroeder <343840+fdncred@users.noreply.github.com> Date: Thu, 3 Feb 2022 11:35:06 -0600 Subject: [PATCH] `du` command (#916) * wip on `du` command * working --- Cargo.lock | 10 + crates/nu-command/Cargo.toml | 90 +++---- crates/nu-command/src/default_context.rs | 1 + crates/nu-command/src/platform/dir_info.rs | 292 +++++++++++++++++++++ crates/nu-command/src/platform/du.rs | 183 +++++++++++++ crates/nu-command/src/platform/mod.rs | 4 + 6 files changed, 535 insertions(+), 45 deletions(-) create mode 100644 crates/nu-command/src/platform/dir_info.rs create mode 100644 crates/nu-command/src/platform/du.rs diff --git a/Cargo.lock b/Cargo.lock index 40f7da8425..cebee7ce63 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -979,6 +979,15 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" +[[package]] +name = "filesize" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "12d741e2415d4e2e5bd1c1d00409d1a8865a57892c2d689b504365655d237d43" +dependencies = [ + "winapi", +] + [[package]] name = "flatbuffers" version = "2.0.0" @@ -2135,6 +2144,7 @@ dependencies = [ "dtparse", "eml-parser", "encoding_rs", + "filesize", "glob", "hamcrest2", "htmlescape", diff --git a/crates/nu-command/Cargo.toml b/crates/nu-command/Cargo.toml index 3bd5313aab..246f572a09 100644 --- a/crates/nu-command/Cargo.toml +++ b/crates/nu-command/Cargo.toml @@ -8,73 +8,73 @@ build = "build.rs" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +nu-ansi-term = "0.42.0" +nu-color-config = { path = "../nu-color-config" } nu-engine = { path = "../nu-engine" } nu-json = { path = "../nu-json" } +nu-parser = { path = "../nu-parser" } nu-path = { path = "../nu-path" } nu-pretty-hex = { path = "../nu-pretty-hex" } nu-protocol = { path = "../nu-protocol" } +nu-system = { path = "../nu-system" } nu-table = { path = "../nu-table" } nu-term-grid = { path = "../nu-term-grid" } nu-test-support = { path = "../nu-test-support" } -nu-parser = { path = "../nu-parser" } -nu-system = { path = "../nu-system" } -# nu-ansi-term = { path = "../nu-ansi-term" } -nu-ansi-term = "0.42.0" -nu-color-config = { path = "../nu-color-config" } # Potential dependencies for extras -url = "2.2.1" -csv = "1.1.3" -glob = "0.3.0" -pathdiff = "0.2.1" -Inflector = "0.11" -thiserror = "1.0.29" -sysinfo = "0.22.2" +base64 = "0.13.0" +bytesize = "1.1.0" +calamine = "0.18.0" chrono = { version = "0.4.19", features = ["serde"] } chrono-humanize = "0.2.1" chrono-tz = "0.6.0" -dtparse = "1.2.0" -terminal_size = "0.1.17" -indexmap = { version="1.7", features=["serde-1"] } -lscolors = { version = "0.8.0", features = ["crossterm"] } -bytesize = "1.1.0" +crossterm = "0.22.1" +csv = "1.1.3" dialoguer = "0.9.0" +digest = "0.10.0" +dtparse = "1.2.0" +eml-parser = "0.1.0" +encoding_rs = "0.8.30" +filesize = "0.2.0" +glob = "0.3.0" +htmlescape = "0.3.1" +ical = "0.7.0" +indexmap = { version="1.7", features=["serde-1"] } +Inflector = "0.11" +itertools = "0.10.0" +lazy_static = "1.4.0" +log = "0.4.14" +lscolors = { version = "0.8.0", features = ["crossterm"] } +md5 = { package = "md-5", version = "0.10.0" } +meval = "0.2.0" +mime = "0.3.16" +num = { version = "0.4.0", optional = true } +pathdiff = "0.2.1" +quick-xml = "0.22" +rand = "0.8" rayon = "1.5.1" regex = "1.5.4" -titlecase = "1.1.0" -meval = "0.2.0" -serde = { version="1.0.123", features=["derive"] } -serde_yaml = "0.8.16" -serde_urlencoded = "0.7.0" -serde_ini = "0.2.0" -eml-parser = "0.1.0" -toml = "0.5.8" -itertools = "0.10.0" -ical = "0.7.0" -calamine = "0.18.0" +reqwest = {version = "0.11", features = ["blocking"] } roxmltree = "0.14.0" -rand = "0.8" rust-embed = "6.3.0" +serde = { version="1.0.123", features=["derive"] } +serde_ini = "0.2.0" +serde_urlencoded = "0.7.0" +serde_yaml = "0.8.16" +sha2 = "0.10.0" +shadow-rs = "0.8.1" +strip-ansi-escapes = "0.1.1" +sysinfo = "0.22.2" +terminal_size = "0.1.17" +thiserror = "1.0.29" +titlecase = "1.1.0" +toml = "0.5.8" trash = { version = "2.0.2", optional = true } unicode-segmentation = "1.8.0" +url = "2.2.1" uuid = { version = "0.8.2", features = ["v4"] } -htmlescape = "0.3.1" -zip = { version="0.5.9", optional=true } -lazy_static = "1.4.0" -strip-ansi-escapes = "0.1.1" -crossterm = "0.22.1" -shadow-rs = "0.8.1" -quick-xml = "0.22" -digest = "0.10.0" -md5 = { package = "md-5", version = "0.10.0" } -sha2 = "0.10.0" -base64 = "0.13.0" -encoding_rs = "0.8.30" -num = { version = "0.4.0", optional = true } -reqwest = {version = "0.11", features = ["blocking"] } -mime = "0.3.16" -log = "0.4.14" which = { version = "4.2.2", optional = true } +zip = { version="0.5.9", optional=true } [target.'cfg(unix)'.dependencies] umask = "1.0.0" diff --git a/crates/nu-command/src/default_context.rs b/crates/nu-command/src/default_context.rs index 184fe2d94c..1a227270a7 100644 --- a/crates/nu-command/src/default_context.rs +++ b/crates/nu-command/src/default_context.rs @@ -31,6 +31,7 @@ pub fn create_default_context(cwd: impl AsRef) -> EngineState { DefEnv, Describe, Do, + Du, Echo, ExportCommand, ExportDef, diff --git a/crates/nu-command/src/platform/dir_info.rs b/crates/nu-command/src/platform/dir_info.rs new file mode 100644 index 0000000000..a18081510e --- /dev/null +++ b/crates/nu-command/src/platform/dir_info.rs @@ -0,0 +1,292 @@ +use filesize::file_real_size_fast; +use glob::Pattern; +use nu_protocol::{ShellError, Span, Value}; +use std::path::PathBuf; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Arc; + +#[derive(Debug, Clone)] +pub struct DirBuilder { + pub tag: Span, + pub min: Option, + pub deref: bool, + pub exclude: Option, + pub all: bool, +} + +impl DirBuilder { + pub fn new( + tag: Span, + min: Option, + deref: bool, + exclude: Option, + all: bool, + ) -> DirBuilder { + DirBuilder { + tag, + min, + deref, + exclude, + all, + } + } +} + +#[derive(Debug, Clone)] +pub struct DirInfo { + dirs: Vec, + files: Vec, + errors: Vec, + size: u64, + blocks: u64, + path: PathBuf, + tag: Span, +} + +#[derive(Debug, Clone)] +pub struct FileInfo { + path: PathBuf, + size: u64, + blocks: Option, + tag: Span, +} + +impl FileInfo { + pub fn new(path: impl Into, deref: bool, tag: Span) -> Result { + let path = path.into(); + let m = if deref { + std::fs::metadata(&path) + } else { + std::fs::symlink_metadata(&path) + }; + + match m { + Ok(d) => { + let block_size = file_real_size_fast(&path, &d).ok(); + + Ok(FileInfo { + path, + blocks: block_size, + size: d.len(), + tag, + }) + } + Err(e) => Err(e.into()), + } + } +} + +impl DirInfo { + pub fn new( + path: impl Into, + params: &DirBuilder, + depth: Option, + ctrl_c: Option>, + ) -> Self { + let path = path.into(); + + let mut s = Self { + dirs: Vec::new(), + errors: Vec::new(), + files: Vec::new(), + size: 0, + blocks: 0, + tag: params.tag, + path, + }; + + match std::fs::metadata(&s.path) { + Ok(d) => { + s.size = d.len(); // dir entry size + s.blocks = file_real_size_fast(&s.path, &d).ok().unwrap_or(0); + } + Err(e) => s = s.add_error(e.into()), + }; + + match std::fs::read_dir(&s.path) { + Ok(d) => { + for f in d { + match ctrl_c { + Some(ref cc) => { + if cc.load(Ordering::SeqCst) { + break; + } + } + None => continue, + } + + match f { + Ok(i) => match i.file_type() { + Ok(t) if t.is_dir() => { + s = s.add_dir(i.path(), depth, params, ctrl_c.clone()) + } + Ok(_t) => s = s.add_file(i.path(), params), + Err(e) => s = s.add_error(e.into()), + }, + Err(e) => s = s.add_error(e.into()), + } + } + } + Err(e) => s = s.add_error(e.into()), + } + s + } + + fn add_dir( + mut self, + path: impl Into, + mut depth: Option, + params: &DirBuilder, + ctrl_c: Option>, + ) -> Self { + if let Some(current) = depth { + if let Some(new) = current.checked_sub(1) { + depth = Some(new); + } else { + return self; + } + } + + let d = DirInfo::new(path, params, depth, ctrl_c); + self.size += d.size; + self.blocks += d.blocks; + self.dirs.push(d); + self + } + + fn add_file(mut self, f: impl Into, params: &DirBuilder) -> Self { + let f = f.into(); + let include = params + .exclude + .as_ref() + .map_or(true, |x| !x.matches_path(&f)); + if include { + match FileInfo::new(f, params.deref, self.tag) { + Ok(file) => { + let inc = params.min.map_or(true, |s| file.size >= s); + if inc { + self.size += file.size; + self.blocks += file.blocks.unwrap_or(0); + if params.all { + self.files.push(file); + } + } + } + Err(e) => self = self.add_error(e), + } + } + self + } + + fn add_error(mut self, e: ShellError) -> Self { + self.errors.push(e); + self + } + + pub fn get_size(&self) -> u64 { + self.size + } +} + +impl From for Value { + fn from(d: DirInfo) -> Self { + let mut cols = vec![]; + let mut vals = vec![]; + + cols.push("path".into()); + vals.push(Value::string(d.path.display().to_string(), d.tag)); + + cols.push("apparent".into()); + vals.push(Value::Filesize { + val: d.size as i64, + span: d.tag, + }); + + cols.push("physical".into()); + vals.push(Value::Filesize { + val: d.blocks as i64, + span: d.tag, + }); + + cols.push("directories".into()); + vals.push(value_from_vec(d.dirs, &d.tag)); + + cols.push("files".into()); + vals.push(value_from_vec(d.files, &d.tag)); + + // if !d.errors.is_empty() { + // let v = d + // .errors + // .into_iter() + // .map(move |e| Value::Error { error: e }) + // .collect::>(); + + // cols.push("errors".into()); + // vals.push(Value::List { + // vals: v, + // span: d.tag, + // }) + // } + + Value::Record { + cols, + vals, + span: d.tag, + } + } +} + +impl From for Value { + fn from(f: FileInfo) -> Self { + let mut cols = vec![]; + let mut vals = vec![]; + + cols.push("path".into()); + vals.push(Value::string(f.path.display().to_string(), f.tag)); + + cols.push("apparent".into()); + vals.push(Value::Filesize { + val: f.size as i64, + span: f.tag, + }); + + cols.push("physical".into()); + vals.push(Value::Filesize { + val: match f.blocks { + Some(b) => b as i64, + None => 0i64, + }, + span: f.tag, + }); + + cols.push("directories".into()); + vals.push(Value::nothing(Span::test_data())); + + cols.push("files".into()); + vals.push(Value::nothing(Span::test_data())); + + // cols.push("errors".into()); + // vals.push(Value::nothing(Span::test_data())); + + Value::Record { + cols, + vals, + span: f.tag, + } + } +} + +fn value_from_vec(vec: Vec, tag: &Span) -> Value +where + V: Into, +{ + if vec.is_empty() { + Value::nothing(*tag) + } else { + let values = vec.into_iter().map(Into::into).collect::>(); + Value::List { + vals: values, + span: *tag, + } + } +} diff --git a/crates/nu-command/src/platform/du.rs b/crates/nu-command/src/platform/du.rs new file mode 100644 index 0000000000..d47dd7e727 --- /dev/null +++ b/crates/nu-command/src/platform/du.rs @@ -0,0 +1,183 @@ +use crate::{DirBuilder, DirInfo, FileInfo}; +use glob::{GlobError, MatchOptions, Pattern}; +use nu_engine::CallExt; +use nu_protocol::{ + ast::Call, + engine::{Command, EngineState, Stack}, + Category, Example, IntoInterruptiblePipelineData, PipelineData, ShellError, Signature, Spanned, + SyntaxShape, Value, +}; +use serde::Deserialize; +use std::path::PathBuf; + +const GLOB_PARAMS: MatchOptions = MatchOptions { + case_sensitive: true, + require_literal_separator: true, + require_literal_leading_dot: false, +}; + +#[derive(Clone)] +pub struct Du; + +#[derive(Deserialize, Clone, Debug)] +pub struct DuArgs { + path: Option>, + all: bool, + deref: bool, + exclude: Option>, + #[serde(rename = "max-depth")] + max_depth: Option, + #[serde(rename = "min-size")] + min_size: Option, +} + +impl Command for Du { + fn name(&self) -> &str { + "du" + } + + fn usage(&self) -> &str { + "Find disk usage sizes of specified items." + } + + fn signature(&self) -> Signature { + Signature::build("du") + .optional("path", SyntaxShape::GlobPattern, "starting directory") + .switch( + "all", + "Output file sizes as well as directory sizes", + Some('a'), + ) + .switch( + "deref", + "Dereference symlinks to their targets for size", + Some('r'), + ) + .named( + "exclude", + SyntaxShape::GlobPattern, + "Exclude these file names", + Some('x'), + ) + .named( + "max-depth", + SyntaxShape::Int, + "Directory recursion limit", + Some('d'), + ) + .named( + "min-size", + SyntaxShape::Int, + "Exclude files below this size", + Some('m'), + ) + .category(Category::Core) + } + + fn run( + &self, + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, + _input: PipelineData, + ) -> Result { + let tag = call.head; + let args = DuArgs { + path: call.opt(engine_state, stack, 0)?, + all: call.has_flag("all"), + deref: call.has_flag("deref"), + exclude: call.get_flag(engine_state, stack, "exclude")?, + max_depth: call + .get_flag::(engine_state, stack, "max-depth")? + .map(|n| (n as u64).try_into().expect("error converting i64 to u64")), + min_size: call.get_flag(engine_state, stack, "min_size")?, + }; + + let exclude = args.exclude.map_or(Ok(None), move |x| { + Pattern::new(&x.item).map(Some).map_err(|e| { + ShellError::SpannedLabeledError(e.msg.to_string(), "glob error".to_string(), x.span) + }) + })?; + + let include_files = args.all; + let mut paths = match args.path { + Some(p) => { + let p = p.item.to_str().expect("Why isn't this encoded properly?"); + glob::glob_with(p, GLOB_PARAMS) + } + None => glob::glob_with("*", GLOB_PARAMS), + } + .map_err(|e| { + ShellError::SpannedLabeledError(e.msg.to_string(), "glob error".to_string(), tag) + })? + .filter(move |p| { + if include_files { + true + } else { + match p { + Ok(f) if f.is_dir() => true, + Err(e) if e.path().is_dir() => true, + _ => false, + } + } + }) + .map(|v| v.map_err(glob_err_into)); + + let all = args.all; + let deref = args.deref; + let max_depth = args.max_depth.map(|f| f as u64); + let min_size = args.min_size.map(|f| f as u64); + + let params = DirBuilder { + tag, + min: min_size, + deref, + exclude, + all, + }; + + let mut output: Vec = vec![]; + for p in paths.by_ref() { + match p { + Ok(a) => { + if a.is_dir() { + output.push( + DirInfo::new(a, ¶ms, max_depth, engine_state.ctrlc.clone()).into(), + ); + } else if let Ok(v) = FileInfo::new(a, deref, tag) { + output.push(v.into()); + } + } + Err(e) => { + output.push(Value::Error { error: e }); + } + } + } + + Ok(output.into_pipeline_data(engine_state.ctrlc.clone())) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Disk usage of the current directory", + example: "du", + result: None, + }] + } +} + +fn glob_err_into(e: GlobError) -> ShellError { + let e = e.into_error(); + ShellError::from(e) +} + +#[cfg(test)] +mod tests { + use super::Du; + + #[test] + fn examples_work_as_expected() { + use crate::test_examples; + test_examples(Du {}) + } +} diff --git a/crates/nu-command/src/platform/mod.rs b/crates/nu-command/src/platform/mod.rs index af8954c538..03654af5c9 100644 --- a/crates/nu-command/src/platform/mod.rs +++ b/crates/nu-command/src/platform/mod.rs @@ -1,5 +1,7 @@ mod ansi; mod clear; +mod dir_info; +mod du; mod input; mod input_keys; mod kill; @@ -8,6 +10,8 @@ mod term_size; pub use ansi::{Ansi, AnsiGradient, AnsiStrip}; pub use clear::Clear; +pub use dir_info::{DirBuilder, DirInfo, FileInfo}; +pub use du::Du; pub use input::Input; pub use input_keys::InputKeys; pub use kill::Kill;