From cb754befe91d6ba3e853004fe90ff3a1af7caa9b Mon Sep 17 00:00:00 2001 From: Hudson Clark Date: Tue, 24 Oct 2023 16:54:26 -0400 Subject: [PATCH] fix: Ensure consistent vals and cols when parsing with `--flexible` (#10814) # Description `from tsv` and `from csv` both support a `--flexible` flag. This flag can be used to "allow the number of fields in records to be variable". Previously, a record's invariant that `rec.cols.len() == rec.vals.len()` could be broken during parsing. This can cause runtime errors as in #10693. Other commands, like `select` were also affected. The inconsistencies are somewhat hard to see, as most nushell code assumes an equal number of columns and values. # Before ### Fewer values than columns ```nushell > let record = (echo "one,two\n1" | from csv --flexible | first) # There are two columns > $record | columns | to nuon [one, two] # But only one value > $record | values | to nuon [1] # And printing the record doesn't show the second column! > $record | to nuon {one: 1} ``` ### More values than columns ```nushell > let record = (echo "one,two\n1,2,3" | from csv --flexible | first) # There are two columns > $record | columns | to nuon [one, two] # But three values > $record | values | to nuon [1, 2, 3] # And printing the record doesn't show the third value! > $record | to nuon {one: 1, two: 2} ``` # After ### Fewer values than columns ```nushell > let record = (echo "one,two\n1" | from csv --flexible | first) # There are two columns > $record | columns | to nuon [one, two] # And a matching number of values > $record | values | to nuon [1, null] # And printing the record works as expected > $record | to nuon {one: 1, two: null} ``` ### More values than columns ```nushell > let record = (echo "one,two\n1,2,3" | from csv --flexible | first) # There are two columns > $record | columns | to nuon [one, two] # And a matching number of values > $record | values | to nuon [1, 2] # And printing the record works as expected > $record | to nuon {one: 1, two: 2} ``` # User-Facing Changes Using the `--flexible` flag with `from csv` and `from tsv` will not result in corrupted record state. # Tests + Formatting # After Submitting --- .../nu-command/src/formats/from/delimited.rs | 32 +++++++++++-------- .../tests/format_conversions/csv.rs | 20 ++++++++++++ 2 files changed, 38 insertions(+), 14 deletions(-) diff --git a/crates/nu-command/src/formats/from/delimited.rs b/crates/nu-command/src/formats/from/delimited.rs index 1a6c63d4b7..33e744fe26 100644 --- a/crates/nu-command/src/formats/from/delimited.rs +++ b/crates/nu-command/src/formats/from/delimited.rs @@ -35,21 +35,25 @@ fn from_delimited_string_to_value( let mut rows = vec![]; for row in reader.records() { - let mut output_row = vec![]; - for value in row?.iter() { - if no_infer { - output_row.push(Value::string(value.to_string(), span)); - continue; - } + let row = row?; + let output_row = (0..headers.len()) + .map(|i| { + row.get(i) + .map(|value| { + if no_infer { + Value::string(value.to_string(), span) + } else if let Ok(i) = value.parse::() { + Value::int(i, span) + } else if let Ok(f) = value.parse::() { + Value::float(f, span) + } else { + Value::string(value.to_string(), span) + } + }) + .unwrap_or(Value::nothing(span)) + }) + .collect::>(); - if let Ok(i) = value.parse::() { - output_row.push(Value::int(i, span)); - } else if let Ok(f) = value.parse::() { - output_row.push(Value::float(f, span)); - } else { - output_row.push(Value::string(value.to_string(), span)); - } - } rows.push(Value::record( Record { cols: headers.clone(), diff --git a/crates/nu-command/tests/format_conversions/csv.rs b/crates/nu-command/tests/format_conversions/csv.rs index 6feff39d70..a10ebeb14c 100644 --- a/crates/nu-command/tests/format_conversions/csv.rs +++ b/crates/nu-command/tests/format_conversions/csv.rs @@ -460,3 +460,23 @@ fn parses_csv_with_unicode_x1f_sep() { assert_eq!(actual.out, "3"); }) } + +#[test] +fn from_csv_test_flexible_extra_vals() { + let actual = nu!(pipeline( + r#" + echo "a,b\n1,2,3" | from csv --flexible | first | values | to nuon + "# + )); + assert_eq!(actual.out, "[1, 2]"); +} + +#[test] +fn from_csv_test_flexible_missing_vals() { + let actual = nu!(pipeline( + r#" + echo "a,b\n1" | from csv --flexible | first | values | to nuon + "# + )); + assert_eq!(actual.out, "[1, null]"); +}