From bce6f5a3e6043cc93b18b4f8687709003d4ddd1e Mon Sep 17 00:00:00 2001 From: siedentop <284732+siedentop@users.noreply.github.com> Date: Sat, 20 Jun 2020 17:22:06 -0700 Subject: [PATCH] Uniq: `--count` flag to count occurences (#2017) * uniq: Add counting option (WIP!) Usage: fetch https://raw.githubusercontent.com/timbray/topfew/master/test/data/access-1k | lines | wrap item | uniq | sort-by count | last 10 * uniq: Add first test * uniq: Re-enable the non-counting variant. * uniq: Also handle primitive lines. * uniq: Update documentation * uniq: Final comment about error handling. Let's get some feedback * uniq: Address review comments. Not happy with the way I create a TypeError. There must be a cleaner way. Anyway, good for shipping. * uniq: Use Labeled_error as suggested by jturner in chat. * uniq: Return error directly. Co-authored-by: Christoph Siedentop --- crates/nu-cli/src/commands/uniq.rs | 69 ++++++++++++++++++++++++---- crates/nu-cli/tests/commands/uniq.rs | 23 ++++++++++ docs/commands/uniq.md | 13 ++++++ 3 files changed, 95 insertions(+), 10 deletions(-) diff --git a/crates/nu-cli/src/commands/uniq.rs b/crates/nu-cli/src/commands/uniq.rs index 4f024188d1..f4c5831434 100644 --- a/crates/nu-cli/src/commands/uniq.rs +++ b/crates/nu-cli/src/commands/uniq.rs @@ -1,9 +1,9 @@ use crate::commands::WholeStreamCommand; use crate::context::CommandRegistry; use crate::prelude::*; -use indexmap::set::IndexSet; +use indexmap::map::IndexMap; use nu_errors::ShellError; -use nu_protocol::{ReturnSuccess, Signature}; +use nu_protocol::Signature; pub struct Uniq; @@ -14,7 +14,7 @@ impl WholeStreamCommand for Uniq { } fn signature(&self) -> Signature { - Signature::build("uniq") + Signature::build("uniq").switch("count", "Count the unique rows", Some('c')) } fn usage(&self) -> &str { @@ -30,17 +30,66 @@ impl WholeStreamCommand for Uniq { } } -async fn uniq(args: CommandArgs, _registry: &CommandRegistry) -> Result { +async fn uniq(args: CommandArgs, registry: &CommandRegistry) -> Result { + let args = args.evaluate_once(®istry).await?; + let should_show_count = args.has("count"); let input = args.input; - let uniq_values: IndexSet<_> = input.collect().await; + let uniq_values = { + let mut counter = IndexMap::::new(); + for line in input.into_vec().await { + *counter.entry(line).or_insert(0) += 1; + } + counter + }; let mut values_vec_deque = VecDeque::new(); - for item in uniq_values - .iter() - .map(|row| ReturnSuccess::value(row.clone())) - { - values_vec_deque.push_back(item); + if should_show_count { + for item in uniq_values { + use nu_protocol::{UntaggedValue, Value}; + let value = { + match item.0.value { + UntaggedValue::Row(mut row) => { + row.entries.insert( + "count".to_string(), + UntaggedValue::int(item.1).into_untagged_value(), + ); + Value { + value: UntaggedValue::Row(row), + tag: item.0.tag, + } + } + UntaggedValue::Primitive(p) => { + let mut map = IndexMap::::new(); + map.insert( + "value".to_string(), + UntaggedValue::Primitive(p).into_untagged_value(), + ); + map.insert( + "count".to_string(), + UntaggedValue::int(item.1).into_untagged_value(), + ); + Value { + value: UntaggedValue::row(map), + tag: item.0.tag, + } + } + UntaggedValue::Table(_) => { + return Err(ShellError::labeled_error( + "uniq -c cannot operate on tables.", + "source", + item.0.tag.span, + )) + } + UntaggedValue::Error(_) | UntaggedValue::Block(_) => item.0, + } + }; + values_vec_deque.push_back(value); + } + } else { + for item in uniq_values { + values_vec_deque.push_back(item.0); + } } Ok(futures::stream::iter(values_vec_deque).to_output_stream()) diff --git a/crates/nu-cli/tests/commands/uniq.rs b/crates/nu-cli/tests/commands/uniq.rs index 465741c292..6449918785 100644 --- a/crates/nu-cli/tests/commands/uniq.rs +++ b/crates/nu-cli/tests/commands/uniq.rs @@ -140,3 +140,26 @@ fn uniq_when_keys_out_of_order() { assert_eq!(actual.out, "1"); } + +#[test] +fn uniq_counting() { + let actual = nu!( + cwd: "tests/fixtures/formats", pipeline( + r#" + echo '["A", "B", "A"]' + | from json + | wrap item + | uniq --count + "# + )); + let expected = nu!( + cwd: "tests/fixtures/formats", pipeline( + r#" + echo '[{"item": "A", "count": 2}, {"item": "B", "count": 1}]' + | from json + "# + )); + print!("{}", actual.out); + print!("{}", expected.out); + assert_eq!(actual.out, expected.out); +} diff --git a/docs/commands/uniq.md b/docs/commands/uniq.md index ce31db1bf3..b38a1cf696 100644 --- a/docs/commands/uniq.md +++ b/docs/commands/uniq.md @@ -34,3 +34,16 @@ Yehuda,Katz,10/11/2013,A 1 │ B ━━━┷━━━━━━━━━ ``` + +### Counting +`--count` or `-c` is the flag to output a `count` column. + +``` +> `open test.csv | get type | uniq -c` +───┬───────┬─────── + # │ value │ count +───┼───────┼─────── + 0 │ A │ 3 + 1 │ B │ 2 +───┴───────┴─────── +``` \ No newline at end of file