From 0a8c9b22b0c3043bd5548fa0577db1c377b8d1f7 Mon Sep 17 00:00:00 2001 From: Bob Hyman Date: Mon, 20 Feb 2023 07:32:20 -0500 Subject: [PATCH] `string | fill` counts clusters, not graphemes; and doesn't count ANSI escape codes (#8134) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enhancement of new `fill` command (#7846) to handle content including ANSI escape codes for formatting or multi-code-point Unicode grapheme clusters. In both of these cases, the content is (many) bytes longer than its visible length, and `fill` was counting the extra bytes so not adding enough fill characters. # Description This script: ```rust # the teacher emoji `\u{1F9D1}\u{200D}\u{1F3EB}` is 3 code points, but only 1 print position wide. echo "This output should be 3 print positions wide, with leading and trailing `+`" $"\u{1F9D1}\u{200D}\u{1F3EB}" | fill -c "+" -w 3 -a "c" echo "This output should be 3 print positions wide, with leading and trailing `+`" $"(ansi green)a(ansi reset)" | fill -c "+" -w 3 -a c echo "" ``` Was producing this output: ```rust This output should be 3 print positions wide, with leading and trailing `+` 🧑‍🏫 This output should be 3 print positions wide, with leading and trailing `+` a ``` After this PR, it produces this output: ```rust This output should be 3 print positions wide, with leading and trailing `+` +🧑‍🏫+ This output should be 3 print positions wide, with leading and trailing `+` +a+ ``` # User-Facing Changes Users may have to undo fixes they may have introduced to work around the former behavior. I have one such in my prompt string that I can now revert. # Tests + Formatting Don't forget to add tests that cover your changes. -- Done Make sure you've run and fixed any issues with these commands: - [x] `cargo fmt --all -- --check` to check standard code formatting (`cargo fmt --all` applies these changes) - [x] `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used -A clippy::needless_collect` to check that you're using the standard code style - [x] `cargo test --workspace` to check that all tests pass # After Submitting `fill` command not documented in the book, and it still talks about `str lpad/rpad`. I'll fix. Note added dependency on a new library `print-positions`, which is an iterator that yields a complete print position (cluster + Ansi sequence) per call. Should this be vendored? --- Cargo.lock | 14 ++++++++-- Cargo.toml | 7 ++++- crates/nu-command/Cargo.toml | 1 + crates/nu-command/src/conversions/fill.rs | 5 ++-- crates/nu-command/tests/commands/fill.rs | 33 +++++++++++++++++++++++ crates/nu-command/tests/commands/mod.rs | 1 + 6 files changed, 56 insertions(+), 5 deletions(-) create mode 100644 crates/nu-command/tests/commands/fill.rs diff --git a/Cargo.lock b/Cargo.lock index d5ca8dccb3..42479488ba 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2777,6 +2777,7 @@ dependencies = [ "percent-encoding", "polars", "powierza-coefficient", + "print-positions", "proptest", "quick-xml 0.27.1", "quickcheck", @@ -3894,6 +3895,15 @@ dependencies = [ "yansi", ] +[[package]] +name = "print-positions" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1df593470e3ef502e48cb0cfc9a3a61e5f61e967b78e1ed35a67ac615cfbd208" +dependencies = [ + "unicode-segmentation", +] + [[package]] name = "proc-macro-error" version = "1.0.4" @@ -5486,9 +5496,9 @@ dependencies = [ [[package]] name = "unicode-segmentation" -version = "1.10.0" +version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fdbf052a0783de01e944a6ce7a8cb939e295b1e7be835a1112c3b9a7f047a5a" +checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36" [[package]] name = "unicode-width" diff --git a/Cargo.toml b/Cargo.toml index 7703cce922..5a0e52e6cc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -77,7 +77,12 @@ signal-hook = { version = "0.3.14", default-features = false } winres = "0.1" [target.'cfg(target_family = "unix")'.dependencies] -nix = { version = "0.25", default-features = false, features = ["signal", "process", "fs", "term"] } +nix = { version = "0.25", default-features = false, features = [ + "signal", + "process", + "fs", + "term", +] } atty = "0.2" [dev-dependencies] diff --git a/crates/nu-command/Cargo.toml b/crates/nu-command/Cargo.toml index c38bd1ac87..950957393f 100644 --- a/crates/nu-command/Cargo.toml +++ b/crates/nu-command/Cargo.toml @@ -98,6 +98,7 @@ url = "2.2.1" uuid = { version = "1.2.2", features = ["v4"] } wax = { version = "0.5.0" } which = { version = "4.4.0", optional = true } +print-positions = "0.6.1" [target.'cfg(windows)'.dependencies] winreg = "0.11.0" diff --git a/crates/nu-command/src/conversions/fill.rs b/crates/nu-command/src/conversions/fill.rs index 6ea2af5cc6..adaba33f69 100644 --- a/crates/nu-command/src/conversions/fill.rs +++ b/crates/nu-command/src/conversions/fill.rs @@ -5,7 +5,7 @@ use nu_protocol::{ engine::{Command, EngineState, Stack}, Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value, }; -use unicode_width::UnicodeWidthStr; +use print_positions::print_positions; #[derive(Clone)] pub struct Fill; @@ -225,7 +225,8 @@ fn fill_string(s: &str, args: &Arguments, span: Span) -> Value { fn pad(s: &str, width: usize, pad_char: &str, alignment: FillAlignment, truncate: bool) -> String { // Attribution: Most of this function was taken from https://github.com/ogham/rust-pad and tweaked. Thank you! // Use width instead of len for graphical display - let cols = UnicodeWidthStr::width(s); + + let cols = print_positions(s).count(); if cols >= width { if truncate { diff --git a/crates/nu-command/tests/commands/fill.rs b/crates/nu-command/tests/commands/fill.rs new file mode 100644 index 0000000000..972bb562cf --- /dev/null +++ b/crates/nu-command/tests/commands/fill.rs @@ -0,0 +1,33 @@ +use nu_test_support::{nu, pipeline}; + +#[test] +fn string_fill_plain() { + let actual = nu!( + cwd: ".", + pipeline( + r#" + "abc" | fill --alignment center --character "+" --width 5 + "# + ) + ); + + assert_eq!(actual.out, "+abc+"); +} + +#[test] +fn string_fill_fancy() { + let actual = nu!( + cwd: ".", + pipeline( + r#" + $"(ansi red)a(ansi green)\u{65}\u{308}(ansi cyan)c(ansi reset)" + | fill --alignment center --character "+" --width 5 + "# + ) + ); + + assert_eq!( + actual.out, + "+\u{1b}[31ma\u{1b}[32me\u{308}\u{1b}[36mc\u{1b}[0m+" + ); +} diff --git a/crates/nu-command/tests/commands/mod.rs b/crates/nu-command/tests/commands/mod.rs index 232af3ff20..3e95115df7 100644 --- a/crates/nu-command/tests/commands/mod.rs +++ b/crates/nu-command/tests/commands/mod.rs @@ -23,6 +23,7 @@ mod every; #[cfg(not(windows))] mod exec; mod export_def; +mod fill; mod find; mod first; mod flatten;