From 83fbf393b041db6b0d760d729a38c06eb6b396c6 Mon Sep 17 00:00:00 2001 From: 132ikl <132@ikl.sh> Date: Mon, 8 Jul 2024 21:03:08 -0400 Subject: [PATCH] Unify sort and sort_by logic --- crates/nu-command/src/filters/sort.rs | 241 ++++------------------- crates/nu-command/src/filters/sort_by.rs | 2 +- crates/nu-command/src/sort_utils.rs | 169 ++++++++++------ 3 files changed, 138 insertions(+), 274 deletions(-) diff --git a/crates/nu-command/src/filters/sort.rs b/crates/nu-command/src/filters/sort.rs index 965b997355..f8adca7cd4 100644 --- a/crates/nu-command/src/filters/sort.rs +++ b/crates/nu-command/src/filters/sort.rs @@ -1,9 +1,12 @@ use alphanumeric_sort::compare_str; use nu_engine::command_prelude::*; +use nu_protocol::{ast::PathMember, IntoValue}; use nu_utils::IgnoreCaseExt; use std::cmp::Ordering; +use crate::{compare_by, compare_values, Comparator}; + #[derive(Clone)] pub struct Sort; @@ -14,10 +17,13 @@ impl Command for Sort { fn signature(&self) -> nu_protocol::Signature { Signature::build("sort") - .input_output_types(vec![( - Type::List(Box::new(Type::Any)), - Type::List(Box::new(Type::Any)), - ), (Type::record(), Type::record()),]) + .input_output_types(vec![ + ( + Type::List(Box::new(Type::Any)), + Type::List(Box::new(Type::Any)) + ), + (Type::record(), Type::record()) + ]) .switch("reverse", "Sort in reverse order", Some('r')) .switch( "ignore-case", @@ -134,233 +140,52 @@ impl Command for Sort { call: &Call, input: PipelineData, ) -> Result { - let head = call.head; let reverse = call.has_flag(engine_state, stack, "reverse")?; let insensitive = call.has_flag(engine_state, stack, "ignore-case")?; let natural = call.has_flag(engine_state, stack, "natural")?; + let sort_by_value = call.has_flag(engine_state, stack, "values")?; let metadata = input.metadata(); let span = input.span().unwrap_or(call.head); - match input { - // Records have two sorting methods, toggled by presence or absence of -v - PipelineData::Value(Value::Record { val, .. }, ..) => { - let sort_by_value = call.has_flag(engine_state, stack, "values")?; - let record = sort_record( + let value = input.into_value(span)?; + let sorted: Value = match value { + Value::Record { val, .. } => { + // Records have two sorting methods, toggled by presence or absence of -v + let record = crate::sort_record( val.into_owned(), - span, sort_by_value, reverse, insensitive, natural, - ); - Ok(record.into_pipeline_data()) + )?; + Value::record(record, span) } - // Other values are sorted here - PipelineData::Value(v, ..) - if !matches!(v, Value::List { .. } | Value::Range { .. }) => - { - Ok(v.into_pipeline_data()) - } - pipe_data => { - let mut vec: Vec<_> = pipe_data.into_iter().collect(); + Value::List { vals, .. } => { + let mut vec = vals.to_owned(); - sort(&mut vec, head, insensitive, natural)?; + crate::sort(&mut vec, insensitive, natural)?; if reverse { vec.reverse() } - let iter = vec.into_iter(); - Ok(iter.into_pipeline_data_with_metadata( - head, - engine_state.ctrlc.clone(), - metadata, - )) + Value::list(vec, span) } - } - } -} - -fn sort_record( - record: Record, - rec_span: Span, - sort_by_value: bool, - reverse: bool, - insensitive: bool, - natural: bool, -) -> Value { - let mut input_pairs: Vec<(String, Value)> = record.into_iter().collect(); - input_pairs.sort_by(|a, b| { - // Extract the data (if sort_by_value) or the column names for comparison - let left_res = if sort_by_value { - match &a.1 { - Value::String { val, .. } => val.clone(), - val => { - if let Ok(val) = val.coerce_string() { - val - } else { - // Values that can't be turned to strings are disregarded by the sort - // (same as in sort_utils.rs) - return Ordering::Equal; - } - } + Value::Nothing { .. } => { + return Err(ShellError::PipelineEmpty { + dst_span: value.span(), + }) } - } else { - a.0.clone() - }; - let right_res = if sort_by_value { - match &b.1 { - Value::String { val, .. } => val.clone(), - val => { - if let Ok(val) = val.coerce_string() { - val - } else { - // Values that can't be turned to strings are disregarded by the sort - // (same as in sort_utils.rs) - return Ordering::Equal; - } - } + _ => { + return Err(ShellError::PipelineMismatch { + exp_input_type: "record or list".to_string(), + dst_span: call.head, + src_span: value.span(), + }) } - } else { - b.0.clone() }; - - // Fold case if case-insensitive - let left = if insensitive { - left_res.to_folded_case() - } else { - left_res - }; - let right = if insensitive { - right_res.to_folded_case() - } else { - right_res - }; - - if natural { - compare_str(left, right) - } else { - left.cmp(&right) - } - }); - - if reverse { - input_pairs.reverse(); + Ok(sorted.into_pipeline_data_with_metadata(metadata)) } - - Value::record(input_pairs.into_iter().collect(), rec_span) -} - -pub fn sort( - vec: &mut [Value], - span: Span, - insensitive: bool, - natural: bool, -) -> Result<(), ShellError> { - match vec.first() { - Some(Value::Record { val, .. }) => { - let columns: Vec = val.columns().cloned().collect(); - vec.sort_by(|a, b| process(a, b, &columns, span, insensitive, natural)); - } - _ => { - vec.sort_by(|a, b| { - let span_a = a.span(); - let span_b = b.span(); - if insensitive { - let folded_left = match a { - Value::String { val, .. } => Value::string(val.to_folded_case(), span_a), - _ => a.clone(), - }; - - let folded_right = match b { - Value::String { val, .. } => Value::string(val.to_folded_case(), span_b), - _ => b.clone(), - }; - - if natural { - match ( - folded_left.coerce_into_string(), - folded_right.coerce_into_string(), - ) { - (Ok(left), Ok(right)) => compare_str(left, right), - _ => Ordering::Equal, - } - } else { - folded_left - .partial_cmp(&folded_right) - .unwrap_or(Ordering::Equal) - } - } else if natural { - match (a.coerce_str(), b.coerce_str()) { - (Ok(left), Ok(right)) => compare_str(left, right), - _ => Ordering::Equal, - } - } else { - a.partial_cmp(b).unwrap_or(Ordering::Equal) - } - }); - } - } - Ok(()) -} - -pub fn process( - left: &Value, - right: &Value, - columns: &[String], - span: Span, - insensitive: bool, - natural: bool, -) -> Ordering { - for column in columns { - let left_value = left.get_data_by_key(column); - - let left_res = match left_value { - Some(left_res) => left_res, - None => Value::nothing(span), - }; - - let right_value = right.get_data_by_key(column); - - let right_res = match right_value { - Some(right_res) => right_res, - None => Value::nothing(span), - }; - - let result = if insensitive { - let span_left = left_res.span(); - let span_right = right_res.span(); - let folded_left = match left_res { - Value::String { val, .. } => Value::string(val.to_folded_case(), span_left), - _ => left_res, - }; - - let folded_right = match right_res { - Value::String { val, .. } => Value::string(val.to_folded_case(), span_right), - _ => right_res, - }; - if natural { - match ( - folded_left.coerce_into_string(), - folded_right.coerce_into_string(), - ) { - (Ok(left), Ok(right)) => compare_str(left, right), - _ => Ordering::Equal, - } - } else { - folded_left - .partial_cmp(&folded_right) - .unwrap_or(Ordering::Equal) - } - } else { - left_res.partial_cmp(&right_res).unwrap_or(Ordering::Equal) - }; - if result != Ordering::Equal { - return result; - } - } - - Ordering::Equal } #[cfg(test)] diff --git a/crates/nu-command/src/filters/sort_by.rs b/crates/nu-command/src/filters/sort_by.rs index 261e3051bd..daa05ce3f6 100644 --- a/crates/nu-command/src/filters/sort_by.rs +++ b/crates/nu-command/src/filters/sort_by.rs @@ -125,7 +125,7 @@ impl Command for SortBy { } } - crate::sort(&mut vec, comparators, head, insensitive, natural)?; + crate::sort_by(&mut vec, comparators, head, insensitive, natural)?; if reverse { vec.reverse() diff --git a/crates/nu-command/src/sort_utils.rs b/crates/nu-command/src/sort_utils.rs index 79b114df38..fde7dddb5d 100644 --- a/crates/nu-command/src/sort_utils.rs +++ b/crates/nu-command/src/sort_utils.rs @@ -3,7 +3,7 @@ use nu_engine::ClosureEval; use nu_protocol::{ ast::CellPath, engine::{Closure, EngineState, Stack}, - PipelineData, ShellError, Span, Value, + PipelineData, Record, ShellError, Span, Value, }; use nu_utils::IgnoreCaseExt; use std::cmp::Ordering; @@ -17,56 +17,33 @@ pub enum Comparator { CellPath(CellPath), } -/// Sort a value. This only makes sense for lists and list-like things, -/// so for everything else we just return the value as-is. -/// CustomValues are converted to their base value and then sorted. -pub fn sort_value( - val: &Value, - comparators: Vec, - ascending: bool, - insensitive: bool, - natural: bool, -) -> Result { - let span = val.span(); - match val { - Value::List { vals, .. } => { - let mut vals = vals.clone(); - sort(&mut vals, comparators, span, insensitive, natural)?; +pub fn sort(vec: &mut [Value], insensitive: bool, natural: bool) -> Result<(), ShellError> { + // to apply insensitive or natural sorting, all values must be strings + let string_sort: bool = vec + .iter() + .all(|value| matches!(value, &Value::String { .. })); - if !ascending { - vals.reverse(); - } + // allow the comparator function to indicate error + // by mutating this option captured by the closure, + // since sort_by closure must be infallible + let mut compare_err: Option = None; - Ok(Value::list(vals, span)) - } - Value::Custom { val, .. } => { - let base_val = val.to_base_value(span)?; - sort_value(&base_val, comparators, ascending, insensitive, natural) - } - _ => Ok(val.to_owned()), + vec.sort_by(|a, b| { + crate::compare_values(a, b, insensitive && string_sort, natural && string_sort) + .unwrap_or_else(|err| { + compare_err.get_or_insert(err); + Ordering::Equal + }) + }); + + if let Some(err) = compare_err { + Err(err) + } else { + Ok(()) } } -/// Sort a value in-place. This is more efficient than sort_value() because it -/// avoids cloning, but it does not work for CustomValues; they are returned as-is. -pub fn sort_value_in_place( - val: &mut Value, - comparators: Vec, - ascending: bool, - insensitive: bool, - natural: bool, -) -> Result<(), ShellError> { - let span = val.span(); - if let Value::List { vals, .. } = val { - sort(vals, comparators, span, insensitive, natural)?; - if !ascending { - vals.reverse(); - } - } - Ok(()) -} - -pub fn sort( +pub fn sort_by( vec: &mut [Value], comparators: Vec, span: Span, @@ -102,7 +79,7 @@ pub fn sort( let mut compare_err: Option = None; vec.sort_by(|a, b| { - compare( + compare_by( a, b, &comparators, @@ -120,7 +97,43 @@ pub fn sort( } } -pub fn compare( +pub fn sort_record( + record: Record, + sort_by_value: bool, + reverse: bool, + insensitive: bool, + natural: bool, +) -> Result { + let mut input_pairs: Vec<(String, Value)> = record.into_iter().collect(); + + // allow the comparator function to indicate error + // by mutating this option captured by the closure, + // since sort_by closure must be infallible + let mut compare_err: Option = None; + + input_pairs.sort_by(|a, b| { + if sort_by_value { + compare_values(&a.1, &b.1, insensitive, natural).unwrap_or_else(|err| { + compare_err.get_or_insert(err); + Ordering::Equal + }) + } else { + compare_strings(&a.0, &b.0, insensitive, natural) + } + }); + + if reverse { + input_pairs.reverse() + } + + if let Some(err) = compare_err { + return Err(err); + } + + Ok(input_pairs.into_iter().collect()) +} + +pub fn compare_by( left: &Value, right: &Value, comparators: &[Comparator], @@ -153,6 +166,48 @@ pub fn compare( Ordering::Equal } +pub fn compare_values( + left: &Value, + right: &Value, + insensitive: bool, + natural: bool, +) -> Result { + if insensitive || natural { + let left_str = left.coerce_string()?; + let right_str = right.coerce_string()?; + Ok(compare_strings(&left_str, &right_str, insensitive, natural)) + } else { + Ok(left.partial_cmp(&right).unwrap_or(Ordering::Equal)) + } +} + +pub fn compare_strings( + left: &String, + right: &String, + insensitive: bool, + natural: bool, +) -> Ordering { + // declare these names now to appease compiler + // not needed in nightly, but needed as of 1.77.2, so can be removed later + let (left_copy, right_copy); + + // only allocate new String if necessary for case folding, + // so callers don't need to pass an owned String + let (left_str, right_str) = if insensitive { + left_copy = left.to_folded_case(); + right_copy = right.to_folded_case(); + (&left_copy, &right_copy) + } else { + (left, right) + }; + + if natural { + alphanumeric_sort::compare_str(left_str, right_str) + } else { + left_str.partial_cmp(right_str).unwrap_or(Ordering::Equal) + } +} + pub fn compare_cell_path( left: &Value, right: &Value, @@ -162,23 +217,7 @@ pub fn compare_cell_path( ) -> Result { let left = left.clone().follow_cell_path(&cell_path.members, false)?; let right = right.clone().follow_cell_path(&cell_path.members, false)?; - - if insensitive || natural { - let mut left_str = left.coerce_into_string()?; - let mut right_str = right.coerce_into_string()?; - if insensitive { - left_str = left_str.to_folded_case(); - right_str = right_str.to_folded_case(); - } - - if natural { - Ok(compare_str(left_str, right_str)) - } else { - Ok(left_str.partial_cmp(&right_str).unwrap_or(Ordering::Equal)) - } - } else { - Ok(left.partial_cmp(&right).unwrap_or(Ordering::Equal)) - } + compare_values(&left, &right, insensitive, natural) } pub fn compare_closure(