From a0ed6ea3c8f1c774163886eb4ebbbe324aeca527 Mon Sep 17 00:00:00 2001 From: Thomas Hartmann Date: Thu, 17 Oct 2019 00:17:58 +0200 Subject: [PATCH 1/4] Adds new tests and updates old ones. New tests are added to test for additional cases that might be trickier to handle with the new logic. Old tests are updated where their expectations are no longer expected to hold true. For instance: previously, lines would be treated separately, allowing any index offset between columns on different rows, as long as they had the same row index as decided by a separator. When this is no longer the case, some things need to be adjusted. --- src/commands/from_ssv.rs | 75 +++++++++++++++++++++++++++++++++++----- 1 file changed, 66 insertions(+), 9 deletions(-) diff --git a/src/commands/from_ssv.rs b/src/commands/from_ssv.rs index 7aca350964..39ad1f7c73 100644 --- a/src/commands/from_ssv.rs +++ b/src/commands/from_ssv.rs @@ -171,9 +171,9 @@ mod tests { a b - 1 2 + 1 2 - 3 4 + 3 4 "#; let result = string_to_table(input, false, 1); assert_eq!( @@ -185,6 +185,20 @@ mod tests { ); } + #[test] + fn it_deals_with_single_column_input() { + let input = r#" + a + 1 + 2 + "#; + let result = string_to_table(input, false, 1); + assert_eq!( + result, + Some(vec![vec![owned("a", "1")], vec![owned("a", "2")]]) + ); + } + #[test] fn it_ignores_headers_when_headerless() { let input = r#" @@ -206,15 +220,15 @@ mod tests { fn it_returns_none_given_an_empty_string() { let input = ""; let result = string_to_table(input, true, 1); - assert_eq!(result, None); + assert!(result.is_none()); } #[test] fn it_allows_a_predefined_number_of_spaces() { let input = r#" column a column b - entry 1 entry number 2 - 3 four + entry 1 entry number 2 + 3 four "#; let result = string_to_table(input, false, 3); @@ -239,12 +253,55 @@ mod tests { let trimmed = |s: &str| s.trim() == s; + let result = string_to_table(input, false, 2).unwrap(); + assert!(result + .iter() + .all(|row| row.iter().all(|(a, b)| trimmed(a) && trimmed(b)))) + } + + #[test] + fn it_keeps_empty_columns() { + let input = r#" + colA col B col C + val2 val3 + val4 val 5 val 6 + val7 val8 + "#; + let result = string_to_table(input, false, 2).unwrap(); assert_eq!( - true, - result - .iter() - .all(|row| row.iter().all(|(a, b)| trimmed(a) && trimmed(b))) + result, + vec![ + vec![ + owned("colA", ""), + owned("col B", "val2"), + owned("col C", "val3") + ], + vec![ + owned("colA", "val4"), + owned("col B", "val 5"), + owned("col C", "val 6") + ], + vec![ + owned("colA", "val7"), + owned("col B", ""), + owned("col C", "val8") + ], + ] + ) + } + + #[test] + fn it_drops_trailing_values() { + let input = r#" + colA col B + val1 val2 trailing value that should be ignored + "#; + + let result = string_to_table(input, false, 2).unwrap(); + assert_eq!( + result, + vec![vec![owned("colA", "val1"), owned("col B", "val2"),],] ) } } From 9b1ff9b5667f864212efa69e51716fbd47cdd157 Mon Sep 17 00:00:00 2001 From: Thomas Hartmann Date: Thu, 17 Oct 2019 00:20:48 +0200 Subject: [PATCH 2/4] Updates the table creation logic. The table parsing/creation logic has changed from treating every line the same to processing each line in context of the column header's placement. Previously, lines on separate rows would go towards the same column as long as they were the same index based on separator alone. Now, each item's index is based on vertical alignment to the column header. This may seem brittle, but it solves the problem of some tables operating with empty cells that would cause remaining values to be paired with the wrong column. Based on kubernetes output (get pods, events), the new method has shown to have much greater success rates for parsing. --- src/commands/from_ssv.rs | 44 +++++++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/src/commands/from_ssv.rs b/src/commands/from_ssv.rs index 39ad1f7c73..3af9e76084 100644 --- a/src/commands/from_ssv.rs +++ b/src/commands/from_ssv.rs @@ -46,33 +46,39 @@ fn string_to_table( let mut lines = s.lines().filter(|l| !l.trim().is_empty()); let separator = " ".repeat(std::cmp::max(split_at, 1)); - let headers = lines - .next()? - .split(&separator) - .map(|s| s.trim()) - .filter(|s| !s.is_empty()) - .map(|s| s.to_owned()) - .collect::>(); + let headers_raw = lines.next()?; - let header_row = if headerless { - (1..=headers.len()) - .map(|i| format!("Column{}", i)) - .collect::>() - } else { + let headers = headers_raw + .trim() + .split(&separator) + .map(str::trim) + .filter(|s| !s.is_empty()) + .map(|s| (headers_raw.find(s).unwrap(), s.to_owned())); + + let columns = if headerless { headers + .enumerate() + .map(|(header_no, (string_index, _))| { + (string_index, format!("Column{}", header_no + 1)) + }) + .collect::>() + } else { + headers.collect::>() }; Some( lines .map(|l| { - header_row + columns .iter() - .zip( - l.split(&separator) - .map(|s| s.trim()) - .filter(|s| !s.is_empty()), - ) - .map(|(a, b)| (String::from(a), String::from(b))) + .enumerate() + .filter_map(|(i, (start, col))| { + (match columns.get(i + 1) { + Some((end, _)) => l.get(*start..*end), + None => l.get(*start..)?.split(&separator).next(), + }) + .and_then(|s| Some((col.clone(), String::from(s.trim())))) + }) .collect() }) .collect(), From 305ca11eb57801ec491984f336294c3331e04903 Mon Sep 17 00:00:00 2001 From: Thomas Hartmann Date: Thu, 17 Oct 2019 09:40:00 +0200 Subject: [PATCH 3/4] Changes the parsing to use the full value of the final column. Previously it would split the last column on the first separator value found between the start of the column and the end of the row. Changing this to using everything from the start of the column to the end of the string makes it behave more similarly to the other columns, making it less surprising. --- src/commands/from_ssv.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/commands/from_ssv.rs b/src/commands/from_ssv.rs index 3af9e76084..913df9981a 100644 --- a/src/commands/from_ssv.rs +++ b/src/commands/from_ssv.rs @@ -75,7 +75,7 @@ fn string_to_table( .filter_map(|(i, (start, col))| { (match columns.get(i + 1) { Some((end, _)) => l.get(*start..*end), - None => l.get(*start..)?.split(&separator).next(), + None => l.get(*start..), }) .and_then(|s| Some((col.clone(), String::from(s.trim())))) }) @@ -298,16 +298,16 @@ mod tests { } #[test] - fn it_drops_trailing_values() { + fn it_uses_the_full_final_column() { let input = r#" colA col B - val1 val2 trailing value that should be ignored + val1 val2 trailing value that should be included "#; let result = string_to_table(input, false, 2).unwrap(); assert_eq!( result, - vec![vec![owned("colA", "val1"), owned("col B", "val2"),],] + vec![vec![owned("colA", "val1"), owned("col B", "val2 trailing value that should be included"),],] ) } } From f21405399cb60df969158abe7a40abc2aee780cd Mon Sep 17 00:00:00 2001 From: Thomas Hartmann Date: Thu, 17 Oct 2019 09:56:06 +0200 Subject: [PATCH 4/4] Formats file. --- src/commands/from_ssv.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/commands/from_ssv.rs b/src/commands/from_ssv.rs index 913df9981a..f14d89356a 100644 --- a/src/commands/from_ssv.rs +++ b/src/commands/from_ssv.rs @@ -307,7 +307,10 @@ mod tests { let result = string_to_table(input, false, 2).unwrap(); assert_eq!( result, - vec![vec![owned("colA", "val1"), owned("col B", "val2 trailing value that should be included"),],] + vec![vec![ + owned("colA", "val1"), + owned("col B", "val2 trailing value that should be included"), + ],] ) } }