diff --git a/Cargo.lock b/Cargo.lock index 93bd02d3fa..9a32c2035f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2957,6 +2957,7 @@ dependencies = [ "trash", "umask", "unicode-segmentation", + "unicode-width", "ureq", "url", "uu_cp", diff --git a/crates/nu-command/Cargo.toml b/crates/nu-command/Cargo.toml index 2edb8f0571..ac550dd819 100644 --- a/crates/nu-command/Cargo.toml +++ b/crates/nu-command/Cargo.toml @@ -97,6 +97,7 @@ uuid = { workspace = true, features = ["v4"] } v_htmlescape = { workspace = true } wax = { workspace = true } which = { workspace = true, optional = true } +unicode-width = { workspace = true } [target.'cfg(windows)'.dependencies] winreg = { workspace = true } diff --git a/crates/nu-command/src/strings/detect_columns.rs b/crates/nu-command/src/strings/detect_columns.rs index fde5f17f72..0d87ee187c 100644 --- a/crates/nu-command/src/strings/detect_columns.rs +++ b/crates/nu-command/src/strings/detect_columns.rs @@ -1,3 +1,5 @@ +use nu_protocol::IntoPipelineData; +use std::io::Cursor; use std::iter::Peekable; use std::str::CharIndices; @@ -36,6 +38,7 @@ impl Command for DetectColumns { "columns to be combined; listed as a range", Some('c'), ) + .switch("legacy", "use another algorithm to detect columns, it may be useful if default one doesn't work", None) .category(Category::Strings) } @@ -54,14 +57,32 @@ impl Command for DetectColumns { call: &Call, input: PipelineData, ) -> Result { - detect_columns(engine_state, stack, call, input) + if !call.has_flag(engine_state, stack, "legacy")? { + guess_width(engine_state, stack, call, input) + } else { + detect_columns_legacy(engine_state, stack, call, input) + } } fn examples(&self) -> Vec { vec![ Example { - description: "Splits string across multiple columns", - example: "'a b c' | detect columns --no-headers", + description: "detect columns by df output", + example: r" +'Filesystem 1K-blocks Used Available Use% Mounted on +none 8150224 4 8150220 1% /mnt/c' | detect columns", + result: Some(Value::test_list(vec![Value::test_record(record! { + "Filesystem" => Value::test_string("none"), + "1K-blocks" => Value::test_string("8150224"), + "Used" => Value::test_string("4"), + "Available" => Value::test_string("8150220"), + "Use%" => Value::test_string("1%"), + "Mounted on" => Value::test_string("/mnt/c") + })])), + }, + Example { + description: "Use --legacy parameter if you find default one does not work", + example: "'a b c' | detect columns --legacy --no-headers", result: Some(Value::test_list(vec![Value::test_record(record! { "column0" => Value::test_string("a"), "column1" => Value::test_string("b"), @@ -71,19 +92,19 @@ impl Command for DetectColumns { Example { description: "", example: - "$'c1 c2 c3 c4 c5(char nl)a b c d e' | detect columns --combine-columns 0..1", + "$'c1 c2 c3 c4 c5(char nl)a b c d e' | detect columns --combine-columns 0..1 --legacy", result: None, }, Example { description: "Splits a multi-line string into columns with headers detected", example: - "$'c1 c2 c3 c4 c5(char nl)a b c d e' | detect columns --combine-columns -2..-1", + "$'c1 c2 c3 c4 c5(char nl)a b c d e' | detect columns --combine-columns -2..-1 --legacy", result: None, }, Example { description: "Splits a multi-line string into columns with headers detected", example: - "$'c1 c2 c3 c4 c5(char nl)a b c d e' | detect columns --combine-columns 2..", + "$'c1 c2 c3 c4 c5(char nl)a b c d e' | detect columns --combine-columns 2.. --legacy", result: None, }, Example { @@ -95,7 +116,84 @@ impl Command for DetectColumns { } } -fn detect_columns( +fn guess_width( + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, + input: PipelineData, +) -> Result { + use super::guess_width::GuessWidth; + let input_span = input.span().unwrap_or(call.head); + + let mut input = input.collect_string("", engine_state.get_config())?; + let num_rows_to_skip: Option = call.get_flag(engine_state, stack, "skip")?; + if let Some(rows) = num_rows_to_skip { + input = input.lines().skip(rows).map(|x| x.to_string()).join("\n"); + } + + let mut guess_width = GuessWidth::new_reader(Box::new(Cursor::new(input))); + let noheader = call.has_flag(engine_state, stack, "no-headers")?; + + let result = guess_width.read_all(); + + if result.is_empty() { + return Ok(Value::nothing(input_span).into_pipeline_data()); + } + let range: Option = call.get_flag(engine_state, stack, "combine-columns")?; + if !noheader { + let columns = result[0].clone(); + Ok(result + .into_iter() + .skip(1) + .map(move |s| { + let mut values: Vec = s + .into_iter() + .map(|v| Value::string(v, input_span)) + .collect(); + // some rows may has less columns, fill it with "" + for _ in values.len()..columns.len() { + values.push(Value::string("", input_span)); + } + let record = + Record::from_raw_cols_vals(columns.clone(), values, input_span, input_span); + match record { + Ok(r) => match &range { + Some(range) => merge_record(r, range, input_span), + None => Value::record(r, input_span), + }, + Err(e) => Value::error(e, input_span), + } + }) + .into_pipeline_data(engine_state.ctrlc.clone())) + } else { + let length = result[0].len(); + let columns: Vec = (0..length).map(|n| format!("column{n}")).collect(); + Ok(result + .into_iter() + .map(move |s| { + let mut values: Vec = s + .into_iter() + .map(|v| Value::string(v, input_span)) + .collect(); + // some rows may has less columns, fill it with "" + for _ in values.len()..columns.len() { + values.push(Value::string("", input_span)); + } + let record = + Record::from_raw_cols_vals(columns.clone(), values, input_span, input_span); + match record { + Ok(r) => match &range { + Some(range) => merge_record(r, range, input_span), + None => Value::record(r, input_span), + }, + Err(e) => Value::error(e, input_span), + } + }) + .into_pipeline_data(engine_state.ctrlc.clone())) + } +} + +fn detect_columns_legacy( engine_state: &EngineState, stack: &mut Stack, call: &Call, @@ -180,64 +278,9 @@ fn detect_columns( } } - let (start_index, end_index) = if let Some(range) = &range { - match nu_cmd_base::util::process_range(range) { - Ok((l_idx, r_idx)) => { - let l_idx = if l_idx < 0 { - record.len() as isize + l_idx - } else { - l_idx - }; - - let r_idx = if r_idx < 0 { - record.len() as isize + r_idx - } else { - r_idx - }; - - if !(l_idx <= r_idx && (r_idx >= 0 || l_idx < (record.len() as isize))) - { - return Value::record(record, name_span); - } - - ( - l_idx.max(0) as usize, - (r_idx as usize + 1).min(record.len()), - ) - } - Err(processing_error) => { - let err = processing_error("could not find range index", name_span); - return Value::error(err, name_span); - } - } - } else { - return Value::record(record, name_span); - }; - - let (mut cols, mut vals): (Vec<_>, Vec<_>) = record.into_iter().unzip(); - - // Merge Columns - ((start_index + 1)..(cols.len() - end_index + start_index + 1)).for_each(|idx| { - cols.swap(idx, end_index - start_index - 1 + idx); - }); - cols.truncate(cols.len() - end_index + start_index + 1); - - // Merge Values - let combined = vals - .iter() - .take(end_index) - .skip(start_index) - .map(|v| v.coerce_str().unwrap_or_default()) - .join(" "); - let binding = Value::string(combined, Span::unknown()); - let last_seg = vals.split_off(end_index); - vals.truncate(start_index); - vals.push(binding); - vals.extend(last_seg); - - match Record::from_raw_cols_vals(cols, vals, Span::unknown(), name_span) { - Ok(record) => Value::record(record, name_span), - Err(err) => Value::error(err, name_span), + match &range { + Some(range) => merge_record(record, range, name_span), + None => Value::record(record, name_span), } }) .into_pipeline_data(ctrlc)) @@ -401,6 +444,80 @@ fn baseline(src: &mut Input) -> Spanned { } } +fn merge_record(record: Record, range: &Range, input_span: Span) -> Value { + let (start_index, end_index) = match process_range(range, record.len(), input_span) { + Ok(Some((l_idx, r_idx))) => (l_idx, r_idx), + Ok(None) => return Value::record(record, input_span), + Err(e) => return Value::error(e, input_span), + }; + + match merge_record_impl(record, start_index, end_index, input_span) { + Ok(rec) => Value::record(rec, input_span), + Err(err) => Value::error(err, input_span), + } +} + +fn process_range( + range: &Range, + length: usize, + input_span: Span, +) -> Result, ShellError> { + match nu_cmd_base::util::process_range(range) { + Ok((l_idx, r_idx)) => { + let l_idx = if l_idx < 0 { + length as isize + l_idx + } else { + l_idx + }; + + let r_idx = if r_idx < 0 { + length as isize + r_idx + } else { + r_idx + }; + + if !(l_idx <= r_idx && (r_idx >= 0 || l_idx < (length as isize))) { + return Ok(None); + } + + Ok(Some(( + l_idx.max(0) as usize, + (r_idx as usize + 1).min(length), + ))) + } + Err(processing_error) => Err(processing_error("could not find range index", input_span)), + } +} + +fn merge_record_impl( + record: Record, + start_index: usize, + end_index: usize, + input_span: Span, +) -> Result { + let (mut cols, mut vals): (Vec<_>, Vec<_>) = record.into_iter().unzip(); + // Merge Columns + ((start_index + 1)..(cols.len() - end_index + start_index + 1)).for_each(|idx| { + cols.swap(idx, end_index - start_index - 1 + idx); + }); + cols.truncate(cols.len() - end_index + start_index + 1); + + // Merge Values + let combined = vals + .iter() + .take(end_index) + .skip(start_index) + .map(|v| v.coerce_str().unwrap_or_default()) + .join(" "); + let binding = Value::string(combined, Span::unknown()); + let last_seg = vals.split_off(end_index); + vals.truncate(start_index); + vals.push(binding); + vals.extend(last_seg); + + Record::from_raw_cols_vals(cols, vals, Span::unknown(), input_span) +} + #[cfg(test)] mod test { use super::*; diff --git a/crates/nu-command/src/strings/guess_width.rs b/crates/nu-command/src/strings/guess_width.rs new file mode 100644 index 0000000000..59cfbcb2cf --- /dev/null +++ b/crates/nu-command/src/strings/guess_width.rs @@ -0,0 +1,464 @@ +/// Attribution: https://github.com/noborus/guesswidth/blob/main/guesswidth.go +/// The MIT License (MIT) as of 2024-03-22 +/// +/// GuessWidth handles the format as formatted by printf. +/// Spaces exist as delimiters, but spaces are not always delimiters. +/// The width seems to be a fixed length, but it doesn't always fit. +/// GuessWidth finds the column separation position +/// from the reference line(header) and multiple lines(body). + +/// Briefly, the algorithm uses a histogram of spaces to find widths. +/// blanks, lines, and pos are variables used in the algorithm. The other +/// items names below are just for reference. +/// blanks = 0000003000113333111100003000 +/// lines = " PID TTY TIME CMD" +/// "302965 pts/3 00:00:11 zsh" +/// "709737 pts/3 00:00:00 ps" +/// +/// measure= "012345678901234567890123456789" +/// spaces = " ^ ^ ^" +/// pos = 6 15 24 <- the carets show these positions +/// the items in pos map to 3's in the blanks array + +/// Now that we have pos, we can let split() use this pos array to figure out +/// how to split all lines by comparing each index to see if there's a space. +/// So, it looks at position 6, 15, 24 and sees if it has a space in those +/// positions. If it does, it splits the line there. If it doesn't, it wiggles +/// around the position to find the next space and splits there. +use std::io::{self, BufRead}; +use unicode_width::UnicodeWidthStr; + +/// the number to scan to analyze. +const SCAN_NUM: u8 = 128; +/// the minimum number of lines to recognize as a separator. +/// 1 if only the header, 2 or more if there is a blank in the body. +const MIN_LINES: usize = 2; +/// whether to trim the space in the value. +const TRIM_SPACE: bool = true; +/// the base line number. It starts from 0. +const HEADER: usize = 0; + +/// GuessWidth reads records from printf-like output. +pub struct GuessWidth { + pub(crate) reader: io::BufReader>, + // a list of separator positions. + pub(crate) pos: Vec, + // stores the lines read for scan. + pub(crate) pre_lines: Vec, + // the number returned by read. + pub(crate) pre_count: usize, + // the maximum number of columns to split. + pub(crate) limit_split: usize, +} + +impl GuessWidth { + pub fn new_reader(r: Box) -> GuessWidth { + let reader = io::BufReader::new(r); + GuessWidth { + reader, + pos: Vec::new(), + pre_lines: Vec::new(), + pre_count: 0, + limit_split: 0, + } + } + + /// read_all reads all rows + /// and returns a two-dimensional slice of rows and columns. + pub fn read_all(&mut self) -> Vec> { + if self.pre_lines.is_empty() { + self.scan(SCAN_NUM); + } + + let mut rows = Vec::new(); + while let Ok(columns) = self.read() { + rows.push(columns); + } + rows + } + + /// scan preReads and parses the lines. + fn scan(&mut self, num: u8) { + for _ in 0..num { + let mut buf = String::new(); + if self.reader.read_line(&mut buf).unwrap_or(0) == 0 { + break; + } + + let line = buf.trim_end().to_string(); + self.pre_lines.push(line); + } + + self.pos = positions(&self.pre_lines, HEADER, MIN_LINES); + if self.limit_split > 0 && self.pos.len() > self.limit_split { + self.pos.truncate(self.limit_split); + } + } + + /// read reads one row and returns a slice of columns. + /// scan is executed first if it is not preRead. + fn read(&mut self) -> Result, io::Error> { + if self.pre_lines.is_empty() { + self.scan(SCAN_NUM); + } + + if self.pre_count < self.pre_lines.len() { + let line = &self.pre_lines[self.pre_count]; + self.pre_count += 1; + Ok(split(line, &self.pos, TRIM_SPACE)) + } else { + let mut buf = String::new(); + if self.reader.read_line(&mut buf)? == 0 { + return Err(io::Error::new(io::ErrorKind::UnexpectedEof, "End of file")); + } + + let line = buf.trim_end().to_string(); + Ok(split(&line, &self.pos, TRIM_SPACE)) + } + } +} + +// positions returns separator positions +// from multiple lines and header line number. +// Lines before the header line are ignored. +fn positions(lines: &[String], header: usize, min_lines: usize) -> Vec { + let mut blanks = Vec::new(); + for (n, line) in lines.iter().enumerate() { + if n < header { + continue; + } + + if n == header { + blanks = lookup_blanks(line.trim_end_matches(' ')); + continue; + } + + count_blanks(&mut blanks, line.trim_end_matches(' ')); + } + + positions_helper(&blanks, min_lines) +} + +fn separator_position(lr: &[char], p: usize, pos: &[usize], n: usize) -> usize { + if lr[p].is_whitespace() { + return p; + } + + let mut f = p; + while f < lr.len() && !lr[f].is_whitespace() { + f += 1; + } + + let mut b = p; + while b > 0 && !lr[b].is_whitespace() { + b -= 1; + } + + if b == pos[n] { + return f; + } + + if n < pos.len() - 1 { + if f == pos[n + 1] { + return b; + } + if b == pos[n] { + return f; + } + if b > pos[n] && b < pos[n + 1] { + return b; + } + } + + f +} + +fn split(line: &str, pos: &[usize], trim_space: bool) -> Vec { + let mut n = 0; + let mut start = 0; + let mut columns = Vec::with_capacity(pos.len() + 1); + let lr: Vec = line.chars().collect(); + let mut w = 0; + + for p in 0..lr.len() { + if pos.is_empty() || n > pos.len() - 1 { + start = p; + break; + } + + if pos[n] <= w { + let end = separator_position(&lr, p, pos, n); + if start > end { + break; + } + let col = &line[start..end]; + let col = if trim_space { col.trim() } else { col }; + columns.push(col.to_string()); + n += 1; + start = end; + } + + w += UnicodeWidthStr::width(lr[p].to_string().as_str()); + } + + // add last part. + let col = &line[start..]; + let col = if trim_space { col.trim() } else { col }; + columns.push(col.to_string()); + columns +} + +// Creates a blank(1) and non-blank(0) slice. +// Execute for the base line (header line). +fn lookup_blanks(line: &str) -> Vec { + let mut blanks = Vec::new(); + let mut first = true; + + for c in line.chars() { + if c == ' ' { + if first { + blanks.push(0); + continue; + } + blanks.push(1); + continue; + } + + first = false; + blanks.push(0); + if UnicodeWidthStr::width(c.to_string().as_str()) == 2 { + blanks.push(0); + } + } + + blanks +} + +// count up if the line is blank where the reference line was blank. +fn count_blanks(blanks: &mut [usize], line: &str) { + let mut n = 0; + + for c in line.chars() { + if n >= blanks.len() { + break; + } + + if c == ' ' && blanks[n] > 0 { + blanks[n] += 1; + } + + n += 1; + if UnicodeWidthStr::width(c.to_string().as_str()) == 2 { + n += 1; + } + } +} + +// Generates a list of separator positions from a blank slice. +fn positions_helper(blanks: &[usize], min_lines: usize) -> Vec { + let mut max = min_lines; + let mut p = 0; + let mut pos = Vec::new(); + + for (n, v) in blanks.iter().enumerate() { + if *v >= max { + max = *v; + p = n; + } + if *v == 0 { + max = min_lines; + if p > 0 { + pos.push(p); + p = 0; + } + } + } + pos +} + +// to_rows returns rows separated by columns. +#[allow(dead_code)] +fn to_rows(lines: Vec, pos: Vec, trim_space: bool) -> Vec> { + let mut rows: Vec> = Vec::with_capacity(lines.len()); + for line in lines { + let columns = split(&line, &pos, trim_space); + rows.push(columns); + } + rows +} + +// to_table parses a slice of lines and returns a table. +#[allow(dead_code)] +pub fn to_table(lines: Vec, header: usize, trim_space: bool) -> Vec> { + let pos = positions(&lines, header, 2); + to_rows(lines, pos, trim_space) +} + +// to_table_n parses a slice of lines and returns a table, but limits the number of splits. +#[allow(dead_code)] +pub fn to_table_n( + lines: Vec, + header: usize, + num_split: usize, + trim_space: bool, +) -> Vec> { + let mut pos = positions(&lines, header, 2); + if pos.len() > num_split { + pos.truncate(num_split); + } + to_rows(lines, pos, trim_space) +} + +#[cfg(test)] +mod tests { + use super::{to_table, to_table_n, GuessWidth}; + + #[test] + fn test_guess_width_ps_trim() { + let input = " PID TTY TIME CMD +302965 pts/3 00:00:11 zsh +709737 pts/3 00:00:00 ps"; + + let r = Box::new(std::io::BufReader::new(input.as_bytes())) as Box; + let reader = std::io::BufReader::new(r); + + let mut guess_width = GuessWidth { + reader, + pos: Vec::new(), + pre_lines: Vec::new(), + pre_count: 0, + limit_split: 0, + }; + + #[rustfmt::skip] + let want = vec![ + vec!["PID", "TTY", "TIME", "CMD"], + vec!["302965", "pts/3", "00:00:11", "zsh"], + vec!["709737", "pts/3", "00:00:00", "ps"], + ]; + let got = guess_width.read_all(); + assert_eq!(got, want); + } + + #[test] + fn test_guess_width_ps_overflow_trim() { + let input = "USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND +root 1 0.0 0.0 168576 13788 ? Ss Mar11 0:49 /sbin/init splash +noborus 703052 2.1 0.7 1184814400 230920 ? Sl 10:03 0:45 /opt/google/chrome/chrome +noborus 721971 0.0 0.0 13716 3524 pts/3 R+ 10:39 0:00 ps aux"; + + let r = Box::new(std::io::BufReader::new(input.as_bytes())) as Box; + let reader = std::io::BufReader::new(r); + + let mut guess_width = GuessWidth { + reader, + pos: Vec::new(), + pre_lines: Vec::new(), + pre_count: 0, + limit_split: 0, + }; + + #[rustfmt::skip] + let want = vec![ + vec!["USER", "PID", "%CPU", "%MEM", "VSZ", "RSS", "TTY", "STAT", "START", "TIME", "COMMAND"], + vec!["root", "1", "0.0", "0.0", "168576", "13788", "?", "Ss", "Mar11", "0:49", "/sbin/init splash"], + vec!["noborus", "703052", "2.1", "0.7", "1184814400", "230920", "?", "Sl", "10:03", "0:45", "/opt/google/chrome/chrome"], + vec!["noborus", "721971", "0.0", "0.0", "13716", "3524", "pts/3", "R+", "10:39", "0:00", "ps aux"], + ]; + let got = guess_width.read_all(); + assert_eq!(got, want); + } + + #[test] + fn test_guess_width_ps_limit_trim() { + let input = " PID TTY TIME CMD +302965 pts/3 00:00:11 zsh +709737 pts/3 00:00:00 ps"; + + let r = Box::new(std::io::BufReader::new(input.as_bytes())) as Box; + let reader = std::io::BufReader::new(r); + + let mut guess_width = GuessWidth { + reader, + pos: Vec::new(), + pre_lines: Vec::new(), + pre_count: 0, + limit_split: 2, + }; + + #[rustfmt::skip] + let want = vec![ + vec!["PID", "TTY", "TIME CMD"], + vec!["302965", "pts/3", "00:00:11 zsh"], + vec!["709737", "pts/3", "00:00:00 ps"], + ]; + let got = guess_width.read_all(); + assert_eq!(got, want); + } + + #[test] + fn test_guess_width_windows_df_trim() { + let input = "Filesystem 1K-blocks Used Available Use% Mounted on +C:/Apps/Git 998797308 869007000 129790308 88% / +D: 104792064 17042676 87749388 17% /d"; + + let r = Box::new(std::io::BufReader::new(input.as_bytes())) as Box; + let reader = std::io::BufReader::new(r); + + let mut guess_width = GuessWidth { + reader, + pos: Vec::new(), + pre_lines: Vec::new(), + pre_count: 0, + limit_split: 0, + }; + + #[rustfmt::skip] + let want = vec![ + vec!["Filesystem","1K-blocks","Used","Available","Use%","Mounted on"], + vec!["C:/Apps/Git","998797308","869007000","129790308","88%","/"], + vec!["D:","104792064","17042676","87749388","17%","/d"], + ]; + let got = guess_width.read_all(); + assert_eq!(got, want); + } + + #[test] + fn test_to_table() { + let lines = vec![ + " PID TTY TIME CMD".to_string(), + "302965 pts/3 00:00:11 zsh".to_string(), + "709737 pts/3 00:00:00 ps".to_string(), + ]; + + let want = vec![ + vec!["PID", "TTY", "TIME", "CMD"], + vec!["302965", "pts/3", "00:00:11", "zsh"], + vec!["709737", "pts/3", "00:00:00", "ps"], + ]; + + let header = 0; + let trim_space = true; + let table = to_table(lines, header, trim_space); + assert_eq!(table, want); + } + + #[test] + fn test_to_table_n() { + let lines = vec![ + "2022-12-21T09:50:16+0000 WARN A warning that should be ignored is usually at this level and should be actionable.".to_string(), + "2022-12-21T09:50:17+0000 INFO This is less important than debug log and is often used to provide context in the current task.".to_string(), + ]; + + let want = vec![ + vec!["2022-12-21T09:50:16+0000", "WARN", "A warning that should be ignored is usually at this level and should be actionable."], + vec!["2022-12-21T09:50:17+0000", "INFO", "This is less important than debug log and is often used to provide context in the current task."], + ]; + + let header = 0; + let trim_space = true; + let num_split = 2; + let table = to_table_n(lines, header, num_split, trim_space); + assert_eq!(table, want); + } +} diff --git a/crates/nu-command/src/strings/mod.rs b/crates/nu-command/src/strings/mod.rs index 4f9f35878b..d99d720b54 100644 --- a/crates/nu-command/src/strings/mod.rs +++ b/crates/nu-command/src/strings/mod.rs @@ -2,6 +2,7 @@ mod char_; mod detect_columns; mod encode_decode; mod format; +mod guess_width; mod parse; mod split; mod str_; diff --git a/crates/nu-command/tests/commands/detect_columns.rs b/crates/nu-command/tests/commands/detect_columns.rs index 662f770130..bc56a70dd8 100644 --- a/crates/nu-command/tests/commands/detect_columns.rs +++ b/crates/nu-command/tests/commands/detect_columns.rs @@ -1,7 +1,7 @@ -use nu_test_support::{nu, playground::Playground}; +use nu_test_support::{nu, pipeline, playground::Playground}; #[test] -fn detect_columns() { +fn detect_columns_with_legacy() { let cases = [( "$\"c1 c2 c3 c4 c5(char nl)a b c d e\"", "[[c1,c2,c3,c4,c5]; [a,b,c,d,e]]", @@ -11,14 +11,14 @@ fn detect_columns() { for case in cases.into_iter() { let out = nu!( cwd: dirs.test(), - "({} | detect columns) == {}", + "({} | detect columns --legacy) == {}", case.0, case.1 ); assert_eq!( out.out, "true", - "({} | detect columns) == {}", + "({} | detect columns --legacy) == {}", case.0, case.1 ); } @@ -26,7 +26,7 @@ fn detect_columns() { } #[test] -fn detect_columns_with_flag_c() { +fn detect_columns_with_legacy_and_flag_c() { let cases = [ ( "$\"c1 c2 c3 c4 c5(char nl)a b c d e\"", @@ -49,7 +49,7 @@ fn detect_columns_with_flag_c() { for case in cases.into_iter() { let out = nu!( cwd: dirs.test(), - "({} | detect columns --combine-columns {}) == {}", + "({} | detect columns --legacy --combine-columns {}) == {}", case.0, case.2, case.1, @@ -57,9 +57,40 @@ fn detect_columns_with_flag_c() { assert_eq!( out.out, "true", - "({} | detect columns --combine-columns {}) == {}", + "({} | detect columns --legacy --combine-columns {}) == {}", case.0, case.2, case.1 ); } }); } + +#[test] +fn detect_columns_with_flag_c() { + let body = "$\" +total 284K(char nl) +drwxr-xr-x 2 root root 4.0K Mar 20 08:28 =(char nl) +drwxr-xr-x 4 root root 4.0K Mar 20 08:18 ~(char nl) +-rw-r--r-- 1 root root 3.0K Mar 20 07:23 ~asdf(char nl)\""; + let expected = "[ +['column0', 'column1', 'column2', 'column3', 'column4', 'column5', 'column8']; +['drwxr-xr-x', '2', 'root', 'root', '4.0K', 'Mar 20 08:28', '='], +['drwxr-xr-x', '4', 'root', 'root', '4.0K', 'Mar 20 08:18', '~'], +['-rw-r--r--', '1', 'root', 'root', '3.0K', 'Mar 20 07:23', '~asdf'] +]"; + let range = "5..7"; + let cmd = format!( + "({} | detect columns -c {} -s 1 --no-headers) == {}", + pipeline(body), + range, + pipeline(expected), + ); + println!("debug cmd: {cmd}"); + Playground::setup("detect_columns_test_1", |dirs, _| { + let out = nu!( + cwd: dirs.test(), + cmd, + ); + println!("{}", out.out); + assert_eq!(out.out, "true"); + }) +}