diff --git a/crates/nu-command/src/strings/split/column.rs b/crates/nu-command/src/strings/split/column.rs index 4aa03f0085..98837474d4 100644 --- a/crates/nu-command/src/strings/split/column.rs +++ b/crates/nu-command/src/strings/split/column.rs @@ -5,6 +5,7 @@ use nu_protocol::{ Category, Example, PipelineData, ShellError, Signature, Span, Spanned, SyntaxShape, Type, Value, }; +use regex::Regex; #[derive(Clone)] pub struct SubCommand; @@ -30,6 +31,7 @@ impl Command for SubCommand { "the character or string that denotes what separates columns", ) .switch("collapse-empty", "remove empty columns", Some('c')) + .switch("regex", "separator is a regular expression", Some('r')) .rest( "rest", SyntaxShape::String, @@ -117,6 +119,25 @@ impl Command for SubCommand { span: Span::test_data(), }), }, + Example { + description: "Split a list of strings into a table, ignoring padding", + example: r"['a - b' 'c - d'] | split column -r '\s*-\s*'", + result: Some(Value::List { + vals: vec![ + Value::Record { + cols: vec!["column1".to_string(), "column2".to_string()], + vals: vec![Value::test_string("a"), Value::test_string("b")], + span: Span::test_data(), + }, + Value::Record { + cols: vec!["column1".to_string(), "column2".to_string()], + vals: vec![Value::test_string("c"), Value::test_string("d")], + span: Span::test_data(), + }, + ], + span: Span::test_data(), + }), + }, ] } } @@ -132,30 +153,43 @@ fn split_column( let rest: Vec> = call.rest(engine_state, stack, 1)?; let collapse_empty = call.has_flag("collapse-empty"); + let regex = if call.has_flag("regex") { + Regex::new(&separator.item) + } else { + let escaped = regex::escape(&separator.item); + Regex::new(&escaped) + } + .map_err(|err| { + ShellError::GenericError( + "Error with regular expression".into(), + err.to_string(), + Some(separator.span), + None, + Vec::new(), + ) + })?; + input.flat_map( - move |x| split_column_helper(&x, &separator, &rest, collapse_empty, name_span), + move |x| split_column_helper(&x, ®ex, &rest, collapse_empty, name_span), engine_state.ctrlc.clone(), ) } fn split_column_helper( v: &Value, - separator: &Spanned, + separator: &Regex, rest: &[Spanned], collapse_empty: bool, head: Span, ) -> Vec { if let Ok(s) = v.as_string() { - let split_result: Vec<_> = if collapse_empty { - s.split(&separator.item).filter(|s| !s.is_empty()).collect() - } else { - s.split(&separator.item).collect() - }; - + let split_result: Vec<_> = separator + .split(&s) + .filter(|x| !(collapse_empty && x.is_empty())) + .collect(); let positional: Vec<_> = rest.iter().map(|f| f.item.clone()).collect(); // If they didn't provide column names, make up our own - let mut cols = vec![]; let mut vals = vec![]; if positional.is_empty() { diff --git a/crates/nu-command/src/strings/split/list.rs b/crates/nu-command/src/strings/split/list.rs index d8aec8ecf7..8db4ef04df 100644 --- a/crates/nu-command/src/strings/split/list.rs +++ b/crates/nu-command/src/strings/split/list.rs @@ -5,6 +5,7 @@ use nu_protocol::{ Category, Example, IntoPipelineData, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value, }; +use regex::Regex; #[derive(Clone)] pub struct SubCommand; @@ -25,6 +26,10 @@ impl Command for SubCommand { SyntaxShape::Any, "the value that denotes what separates the list", ) + .switch( + "regex", + "separator is a regular expression, matching values that can be coerced into a string", + Some('r')) .category(Category::Filters) } @@ -121,10 +126,76 @@ impl Command for SubCommand { span: Span::test_data(), }), }, + Example { + description: "Split a list of chars into lists based on multiple characters", + example: r"[a, b, c, d, a, e, f, g] | split list -r '(b|e)'", + result: Some(Value::List { + vals: vec![ + Value::List { + vals: vec![Value::test_string("a")], + span: Span::test_data(), + }, + Value::List { + vals: vec![ + Value::test_string("c"), + Value::test_string("d"), + Value::test_string("a"), + ], + span: Span::test_data(), + }, + Value::List { + vals: vec![Value::test_string("f"), Value::test_string("g")], + span: Span::test_data(), + }, + ], + span: Span::test_data(), + }), + }, ] } } +enum Matcher { + Regex(Regex), + Direct(Value), +} + +impl Matcher { + pub fn new(regex: bool, lhs: Value) -> Result { + if regex { + Ok(Matcher::Regex(Regex::new(&lhs.as_string()?).map_err( + |err| { + ShellError::GenericError( + "Error with regular expression".into(), + err.to_string(), + match lhs { + Value::Error { error: _ } => None, + _ => Some(lhs.expect_span()), + }, + None, + Vec::new(), + ) + }, + )?)) + } else { + Ok(Matcher::Direct(lhs)) + } + } + + pub fn compare(&self, rhs: &Value) -> Result { + Ok(match self { + Matcher::Regex(regex) => { + if let Ok(rhs_str) = rhs.as_string() { + regex.is_match(&rhs_str) + } else { + false + } + } + Matcher::Direct(lhs) => rhs == lhs, + }) + } +} + fn split_list( engine_state: &EngineState, stack: &mut Stack, @@ -134,9 +205,11 @@ fn split_list( let separator: Value = call.req(engine_state, stack, 0)?; let mut temp_list = Vec::new(); let mut returned_list = Vec::new(); + let iter = input.into_interruptible_iter(engine_state.ctrlc.clone()); + let matcher = Matcher::new(call.has_flag("regex"), separator)?; for val in iter { - if val == separator { + if matcher.compare(&val)? { if !temp_list.is_empty() { returned_list.push(Value::List { vals: temp_list.clone(), diff --git a/crates/nu-command/src/strings/split/row.rs b/crates/nu-command/src/strings/split/row.rs index 3cfe8036d3..3dea82213d 100644 --- a/crates/nu-command/src/strings/split/row.rs +++ b/crates/nu-command/src/strings/split/row.rs @@ -5,7 +5,7 @@ use nu_protocol::{ Category, Example, PipelineData, ShellError, Signature, Span, Spanned, SyntaxShape, Type, Value, }; - +use regex::Regex; #[derive(Clone)] pub struct SubCommand; @@ -21,7 +21,7 @@ impl Command for SubCommand { .required( "separator", SyntaxShape::String, - "the character that denotes what separates rows", + "a character or regex that denotes what separates rows", ) .named( "number", @@ -29,6 +29,7 @@ impl Command for SubCommand { "Split into maximum number of items", Some('n'), ) + .switch("regex", "use regex syntax for separator", Some('r')) .category(Category::Strings) } @@ -92,6 +93,18 @@ impl Command for SubCommand { span: Span::test_data(), }), }, + Example { + description: "Split a string by regex", + example: r"'a b c' | split row -r '\s+'", + result: Some(Value::List { + vals: vec![ + Value::test_string("a"), + Value::test_string("b"), + Value::test_string("c"), + ], + span: Span::test_data(), + }), + }, ] } } @@ -104,30 +117,40 @@ fn split_row( ) -> Result { let name_span = call.head; let separator: Spanned = call.req(engine_state, stack, 0)?; + let regex = if call.has_flag("regex") { + Regex::new(&separator.item) + } else { + let escaped = regex::escape(&separator.item); + Regex::new(&escaped) + } + .map_err(|err| { + ShellError::GenericError( + "Error with regular expression".into(), + err.to_string(), + Some(separator.span), + None, + Vec::new(), + ) + })?; let max_split: Option = call.get_flag(engine_state, stack, "number")?; input.flat_map( - move |x| split_row_helper(&x, &separator, max_split, name_span), + move |x| split_row_helper(&x, ®ex, max_split, name_span), engine_state.ctrlc.clone(), ) } -fn split_row_helper( - v: &Value, - separator: &Spanned, - max_split: Option, - name: Span, -) -> Vec { +fn split_row_helper(v: &Value, regex: &Regex, max_split: Option, name: Span) -> Vec { match v.span() { Ok(v_span) => { if let Ok(s) = v.as_string() { match max_split { - Some(max_split) => s - .splitn(max_split, &separator.item) - .map(|s| Value::string(s, v_span)) + Some(max_split) => regex + .splitn(&s, max_split) + .map(|x: &str| Value::string(x, v_span)) .collect(), - None => s - .split(&separator.item) - .map(|s| Value::string(s, v_span)) + None => regex + .split(&s) + .map(|x: &str| Value::string(x, v_span)) .collect(), } } else { diff --git a/crates/nu-command/tests/commands/split_column.rs b/crates/nu-command/tests/commands/split_column.rs index 2b45263425..3a4d4c8f0b 100644 --- a/crates/nu-command/tests/commands/split_column.rs +++ b/crates/nu-command/tests/commands/split_column.rs @@ -5,12 +5,20 @@ use nu_test_support::{nu, pipeline}; #[test] fn to_column() { Playground::setup("split_column_test_1", |dirs, sandbox| { - sandbox.with_files(vec![FileWithContentToBeTrimmed( - "sample.txt", - r#" + sandbox.with_files(vec![ + FileWithContentToBeTrimmed( + "sample.txt", + r#" importer,shipper,tariff_item,name,origin "#, - )]); + ), + FileWithContentToBeTrimmed( + "sample2.txt", + r#" + importer , shipper , tariff_item , name , origin + "#, + ), + ]); let actual = nu!( cwd: dirs.test(), pipeline( @@ -24,5 +32,18 @@ fn to_column() { )); assert!(actual.out.contains("shipper")); + + let actual = nu!( + cwd: dirs.test(), pipeline( + r#" + open sample2.txt + | lines + | str trim + | split column -r '\s*,\s*' + | get column2 + "# + )); + + assert!(actual.out.contains("shipper")); }) } diff --git a/crates/nu-command/tests/commands/split_row.rs b/crates/nu-command/tests/commands/split_row.rs index e7e7dd4f37..0517213108 100644 --- a/crates/nu-command/tests/commands/split_row.rs +++ b/crates/nu-command/tests/commands/split_row.rs @@ -5,12 +5,20 @@ use nu_test_support::{nu, pipeline}; #[test] fn to_row() { Playground::setup("split_row_test_1", |dirs, sandbox| { - sandbox.with_files(vec![FileWithContentToBeTrimmed( - "sample.txt", - r#" + sandbox.with_files(vec![ + FileWithContentToBeTrimmed( + "sample.txt", + r#" importer,shipper,tariff_item,name,origin "#, - )]); + ), + FileWithContentToBeTrimmed( + "sample2.txt", + r#" + importer , shipper , tariff_item,name , origin + "#, + ), + ]); let actual = nu!( cwd: dirs.test(), pipeline( @@ -24,5 +32,18 @@ fn to_row() { )); assert!(actual.out.contains('5')); + + let actual = nu!( + cwd: dirs.test(), pipeline( + r#" + open sample2.txt + | lines + | str trim + | split row -r '\s*,\s*' + | length + "# + )); + + assert!(actual.out.contains('5')); }) }