diff --git a/crates/nu-command/src/example_test.rs b/crates/nu-command/src/example_test.rs index 7fb5f1e5b2..f4cf8c6910 100644 --- a/crates/nu-command/src/example_test.rs +++ b/crates/nu-command/src/example_test.rs @@ -10,8 +10,8 @@ pub fn test_examples(cmd: impl Command + 'static) { mod test_examples { use super::super::{ Ansi, Date, Enumerate, Flatten, From, Get, Into, IntoString, LetEnv, Math, MathEuler, - MathPi, MathRound, ParEach, Path, Random, Sort, SortBy, Split, SplitColumn, SplitRow, Str, - StrJoin, StrLength, StrReplace, Update, Url, Values, Wrap, + MathPi, MathRound, ParEach, Path, PathParse, Random, Sort, SortBy, Split, SplitColumn, + SplitRow, Str, StrJoin, StrLength, StrReplace, Update, Url, Values, Wrap, }; use crate::{Each, To}; use nu_cmd_lang::example_support::{ @@ -85,6 +85,7 @@ mod test_examples { working_set.add_decl(Box::new(MathRound)); working_set.add_decl(Box::new(Mut)); working_set.add_decl(Box::new(Path)); + working_set.add_decl(Box::new(PathParse)); working_set.add_decl(Box::new(ParEach)); working_set.add_decl(Box::new(Random)); working_set.add_decl(Box::new(Sort)); diff --git a/crates/nu-command/src/filters/group_by.rs b/crates/nu-command/src/filters/group_by.rs index fd6e851673..7e0043b734 100644 --- a/crates/nu-command/src/filters/group_by.rs +++ b/crates/nu-command/src/filters/group_by.rs @@ -1,9 +1,8 @@ use nu_engine::{eval_block, CallExt}; -use nu_protocol::ast::Call; +use nu_protocol::ast::{Call, CellPath}; use nu_protocol::engine::{Closure, Command, EngineState, Stack}; use nu_protocol::{ - Example, IntoPipelineData, PipelineData, ShellError, Signature, Span, Spanned, SyntaxShape, - Type, Value, + Example, IntoPipelineData, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value, }; use indexmap::IndexMap; @@ -26,7 +25,16 @@ impl Command for GroupBy { Type::List(Box::new(Type::Any)), Type::Record(vec![]), )]) - .optional("grouper", SyntaxShape::Any, "the grouper value to use") + .optional( + "grouper", + SyntaxShape::OneOf(vec![ + SyntaxShape::CellPath, + SyntaxShape::Block, + SyntaxShape::Closure(None), + SyntaxShape::Closure(Some(vec![SyntaxShape::Any])), + ]), + "the path to the column to group on", + ) } fn usage(&self) -> &str { @@ -50,6 +58,33 @@ impl Command for GroupBy { example: r#"ls | group-by type"#, result: None, }, + Example { + description: "Group items by the \"foo\" column's values, ignoring records without a \"foo\" column", + example: r#"open cool.json | group-by foo?"#, + result: None, + }, + Example { + description: "Group using a block which is evaluated against each input value", + example: "[foo.txt bar.csv baz.txt] | group-by { path parse | get extension }", + result: Some(Value::Record { + cols: vec!["txt".to_string(), "csv".to_string()], + vals: vec![ + Value::List { + vals: vec![ + Value::test_string("foo.txt"), + Value::test_string("baz.txt"), + ], + span: Span::test_data(), + }, + Value::List { + vals: vec![Value::test_string("bar.csv")], + span: Span::test_data(), + }, + ], + span: Span::test_data(), + }), + }, + Example { description: "You can also group by raw values by leaving out the argument", example: "['1' '3' '1' '3' '2' '1' '1'] | group-by", @@ -81,131 +116,163 @@ impl Command for GroupBy { } } -enum Grouper { - ByColumn(Option>), - ByBlock, -} - pub fn group_by( engine_state: &EngineState, stack: &mut Stack, call: &Call, input: PipelineData, ) -> Result { - let name = call.head; + let span = call.head; let grouper: Option = call.opt(engine_state, stack, 0)?; let values: Vec = input.into_iter().collect(); - let mut keys: Vec> = vec![]; - let mut group_strategy = Grouper::ByColumn(None); if values.is_empty() { return Err(ShellError::GenericError( "expected table from pipeline".into(), "requires a table input".into(), - Some(name), + Some(span), None, Vec::new(), )); } - let first = values[0].clone(); - - let value_list = Value::List { - vals: values.clone(), - span: name, - }; - - match grouper { + let group_value = match grouper { + Some(Value::CellPath { val, span }) => group_cell_path(val, values, span)?, Some(Value::Block { .. }) | Some(Value::Closure { .. }) => { let block: Option = call.opt(engine_state, stack, 0)?; - let error_key = "error"; - - for value in values { - if let Some(capture_block) = &block { - let mut stack = stack.captures_to_stack(&capture_block.captures); - let block = engine_state.get_block(capture_block.block_id); - let pipeline = eval_block( - engine_state, - &mut stack, - block, - value.into_pipeline_data(), - call.redirect_stdout, - call.redirect_stderr, - ); - - match pipeline { - Ok(s) => { - let collection: Vec = s.into_iter().collect(); - - if collection.len() > 1 { - return Err(ShellError::GenericError( - "expected one value from the block".into(), - "requires a table with one value for grouping".into(), - Some(name), - None, - Vec::new(), - )); - } - - let value = match collection.get(0) { - Some(Value::Error { .. }) | None => Value::string(error_key, name), - Some(return_value) => return_value.clone(), - }; - - keys.push(value.as_string()); - } - Err(_) => { - keys.push(Ok(error_key.into())); - } - } - } - } - - group_strategy = Grouper::ByBlock; + group_closure(&values, span, block, stack, engine_state, call)? } - Some(other) => { - group_strategy = Grouper::ByColumn(Some(Spanned { - item: other.as_string()?, - span: name, - })); + None => group_no_grouper(values, span)?, + _ => { + return Err(ShellError::TypeMismatch { + err_message: "unsupported grouper type".to_string(), + span, + }) } - _ => {} - } - - let name = if let Ok(span) = first.span() { - span - } else { - name }; - let group_value = match group_strategy { - Grouper::ByBlock => { - let map = keys; - - let block = Box::new(move |idx: usize, row: &Value| match map.get(idx) { - Some(Ok(key)) => Ok(key.clone()), - Some(Err(reason)) => Err(reason.clone()), - None => row.as_string(), - }); - - data_group(&value_list, &Some(block), name) - } - Grouper::ByColumn(column_name) => group(&column_name, &value_list, name), - }; - - Ok(PipelineData::Value(group_value?, None)) + Ok(PipelineData::Value(group_value, None)) } -#[allow(clippy::type_complexity)] -pub fn data_group( - values: &Value, - grouper: &Option Result + Send>>, +pub fn group_cell_path( + column_name: CellPath, + values: Vec, span: Span, ) -> Result { let mut groups: IndexMap> = IndexMap::new(); - for (idx, value) in values.clone().into_pipeline_data().into_iter().enumerate() { + for value in values.into_iter() { + let group_key = value + .clone() + .follow_cell_path(&column_name.members, false)?; + if matches!(group_key, Value::Nothing { .. }) { + continue; // likely the result of a failed optional access, ignore this value + } + + let group_key = group_key.as_string()?; + let group = groups.entry(group_key).or_default(); + group.push(value); + } + + let mut cols = vec![]; + let mut vals = vec![]; + + for (k, v) in groups { + cols.push(k.to_string()); + vals.push(Value::List { vals: v, span }); + } + + Ok(Value::Record { cols, vals, span }) +} + +pub fn group_no_grouper(values: Vec, span: Span) -> Result { + let mut groups: IndexMap> = IndexMap::new(); + + for value in values.into_iter() { + let group_key = value.as_string()?; + let group = groups.entry(group_key).or_default(); + group.push(value); + } + + let mut cols = vec![]; + let mut vals = vec![]; + + for (k, v) in groups { + cols.push(k.to_string()); + vals.push(Value::List { vals: v, span }); + } + + Ok(Value::Record { cols, vals, span }) +} + +// TODO: refactor this, it's a bit of a mess +fn group_closure( + values: &Vec, + span: Span, + block: Option, + stack: &mut Stack, + engine_state: &EngineState, + call: &Call, +) -> Result { + let error_key = "error"; + let mut keys: Vec> = vec![]; + let value_list = Value::List { + vals: values.clone(), + span, + }; + + for value in values { + if let Some(capture_block) = &block { + let mut stack = stack.captures_to_stack(&capture_block.captures); + let block = engine_state.get_block(capture_block.block_id); + let pipeline = eval_block( + engine_state, + &mut stack, + block, + value.clone().into_pipeline_data(), + call.redirect_stdout, + call.redirect_stderr, + ); + + match pipeline { + Ok(s) => { + let collection: Vec = s.into_iter().collect(); + + if collection.len() > 1 { + return Err(ShellError::GenericError( + "expected one value from the block".into(), + "requires a table with one value for grouping".into(), + Some(span), + None, + Vec::new(), + )); + } + + let value = match collection.get(0) { + Some(Value::Error { .. }) | None => Value::string(error_key, span), + Some(return_value) => return_value.clone(), + }; + + keys.push(value.as_string()); + } + Err(_) => { + keys.push(Ok(error_key.into())); + } + } + } + } + let map = keys; + let block = Box::new(move |idx: usize, row: &Value| match map.get(idx) { + Some(Ok(key)) => Ok(key.clone()), + Some(Err(reason)) => Err(reason.clone()), + None => row.as_string(), + }); + + let grouper = &Some(block); + let mut groups: IndexMap> = IndexMap::new(); + + for (idx, value) in value_list.into_pipeline_data().into_iter().enumerate() { let group_key = if let Some(ref grouper) = grouper { grouper(idx, &value) } else { @@ -227,48 +294,6 @@ pub fn data_group( Ok(Value::Record { cols, vals, span }) } -pub fn group( - column_name: &Option>, - values: &Value, - span: Span, -) -> Result { - let name = span; - - let grouper = if let Some(column_name) = column_name { - Grouper::ByColumn(Some(column_name.clone())) - } else { - Grouper::ByColumn(None) - }; - - match grouper { - Grouper::ByColumn(Some(column_name)) => { - let block = Box::new(move |_, row: &Value| { - if let Value::Error { error } = row { - return Err(*error.clone()); - }; - match row.get_data_by_key(&column_name.item) { - Some(group_key) => Ok(group_key.as_string()?), - None => Err(ShellError::CantFindColumn { - col_name: column_name.item.to_string(), - span: column_name.span, - src_span: row.expect_span(), - }), - } - }); - - data_group(values, &Some(block), name) - } - Grouper::ByColumn(None) => { - let block = Box::new(move |_, row: &Value| row.as_string()); - - data_group(values, &Some(block), name) - } - Grouper::ByBlock => Err(ShellError::NushellFailed { - msg: "Block not implemented: This should never happen.".into(), - }), - } -} - #[cfg(test)] mod test { use super::*; diff --git a/crates/nu-command/src/filters/split_by.rs b/crates/nu-command/src/filters/split_by.rs index 15fffa354e..4f989ac5b8 100644 --- a/crates/nu-command/src/filters/split_by.rs +++ b/crates/nu-command/src/filters/split_by.rs @@ -3,7 +3,8 @@ use nu_engine::CallExt; use nu_protocol::ast::Call; use nu_protocol::engine::{Command, EngineState, Stack}; use nu_protocol::{ - Example, PipelineData, ShellError, Signature, Span, Spanned, SyntaxShape, Type, Value, + Example, IntoPipelineData, PipelineData, ShellError, Signature, Span, Spanned, SyntaxShape, + Type, Value, }; #[derive(Clone)] @@ -182,6 +183,36 @@ pub fn split( } } +#[allow(clippy::type_complexity)] +fn data_group( + values: &Value, + grouper: &Option Result + Send>>, + span: Span, +) -> Result { + let mut groups: IndexMap> = IndexMap::new(); + + for (idx, value) in values.clone().into_pipeline_data().into_iter().enumerate() { + let group_key = if let Some(ref grouper) = grouper { + grouper(idx, &value) + } else { + value.as_string() + }; + + let group = groups.entry(group_key?).or_default(); + group.push(value); + } + + let mut cols = vec![]; + let mut vals = vec![]; + + for (k, v) in groups { + cols.push(k.to_string()); + vals.push(Value::List { vals: v, span }); + } + + Ok(Value::Record { cols, vals, span }) +} + #[allow(clippy::type_complexity)] pub fn data_split( value: PipelineData, @@ -203,7 +234,7 @@ pub fn data_split( _, ) => { for (idx, list) in grouped_rows.iter().enumerate() { - match super::group_by::data_group(list, splitter, span) { + match data_group(list, splitter, span) { Ok(grouped) => { if let Value::Record { vals: li, diff --git a/crates/nu-command/tests/commands/group_by.rs b/crates/nu-command/tests/commands/group_by.rs index 0765ce2931..dba7afc733 100644 --- a/crates/nu-command/tests/commands/group_by.rs +++ b/crates/nu-command/tests/commands/group_by.rs @@ -72,7 +72,7 @@ fn errors_if_given_unknown_column_name() { } #[test] -fn errors_if_block_given_evaluates_more_than_one_row() { +fn errors_if_column_not_found() { Playground::setup("group_by_test_3", |dirs, sandbox| { sandbox.with_files(vec![FileWithContentToBeTrimmed( "los_tres_caballeros.csv", @@ -92,21 +92,19 @@ fn errors_if_block_given_evaluates_more_than_one_row() { "# )); - assert!(actual.err.contains("value originates here"),); - assert!(actual.err.contains("cannot find column"),); + assert!(actual.err.contains("did you mean 'type'"),); }) } #[test] fn errors_if_input_empty() { - Playground::setup("group_by_empty_test", |dirs, _sandbox| { - let actual = nu!( - cwd: dirs.test(), pipeline( - r#" - group-by date - "# - )); - - assert!(actual.err.contains("expected table from pipeline")); - }); + let actual = nu!("group-by date"); + assert!(actual.err.contains("expected table from pipeline")); +} + +#[test] +fn optional_cell_path_works() { + let actual = nu!("[{foo: 123}, {foo: 234}, {bar: 345}] | group-by foo? | to nuon"); + let expected = r#"{"123": [[foo]; [123]], "234": [[foo]; [234]]}"#; + assert_eq!(actual.out, expected) }