From 6891267b5347f42f1fc53d1f737928ade2449660 Mon Sep 17 00:00:00 2001 From: Ian Manske Date: Thu, 16 May 2024 23:59:08 +0000 Subject: [PATCH] Support `ByteStream`s in `bytes starts-with` and `bytes ends-with` (#12887) # Description Restores `bytes starts-with` so that it is able to work with byte streams once again. For parity/consistency, this PR also adds byte stream support to `bytes ends-with`. # User-Facing Changes - `bytes ends-with` now supports byte streams. # Tests + Formatting Re-enabled tests for `bytes starts-with` and added tests for `bytes ends-with`. --- .../tests/commands/bytes/ends_with.rs | 120 +++++++++++++ .../nu-cmd-extra/tests/commands/bytes/mod.rs | 1 + .../tests/commands/bytes/starts_with.rs | 160 +++++++++--------- crates/nu-command/src/bytes/ends_with.rs | 54 +++++- crates/nu-command/src/bytes/starts_with.rs | 36 ++-- 5 files changed, 275 insertions(+), 96 deletions(-) create mode 100644 crates/nu-cmd-extra/tests/commands/bytes/ends_with.rs diff --git a/crates/nu-cmd-extra/tests/commands/bytes/ends_with.rs b/crates/nu-cmd-extra/tests/commands/bytes/ends_with.rs new file mode 100644 index 0000000000..b90f936b96 --- /dev/null +++ b/crates/nu-cmd-extra/tests/commands/bytes/ends_with.rs @@ -0,0 +1,120 @@ +use nu_test_support::nu; + +#[test] +fn basic_binary_end_with() { + let actual = nu!(r#" + "hello world" | into binary | bytes ends-with 0x[77 6f 72 6c 64] + "#); + + assert_eq!(actual.out, "true"); +} + +#[test] +fn basic_string_fails() { + let actual = nu!(r#" + "hello world" | bytes ends-with 0x[77 6f 72 6c 64] + "#); + + assert!(actual.err.contains("command doesn't support")); + assert_eq!(actual.out, ""); +} + +#[test] +fn short_stream_binary() { + let actual = nu!(r#" + nu --testbin repeater (0x[01]) 5 | bytes ends-with 0x[010101] + "#); + + assert_eq!(actual.out, "true"); +} + +#[test] +fn short_stream_mismatch() { + let actual = nu!(r#" + nu --testbin repeater (0x[010203]) 5 | bytes ends-with 0x[010204] + "#); + + assert_eq!(actual.out, "false"); +} + +#[test] +fn short_stream_binary_overflow() { + let actual = nu!(r#" + nu --testbin repeater (0x[01]) 5 | bytes ends-with 0x[010101010101] + "#); + + assert_eq!(actual.out, "false"); +} + +#[test] +fn long_stream_binary() { + let actual = nu!(r#" + nu --testbin repeater (0x[01]) 32768 | bytes ends-with 0x[010101] + "#); + + assert_eq!(actual.out, "true"); +} + +#[test] +fn long_stream_binary_overflow() { + // .. ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow + let actual = nu!(r#" + nu --testbin repeater (0x[01]) 32768 | bytes ends-with (0..32768 | each {|| 0x[01] } | bytes collect) + "#); + + assert_eq!(actual.out, "false"); +} + +#[test] +fn long_stream_binary_exact() { + // ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow + let actual = nu!(r#" + nu --testbin repeater (0x[01020304]) 8192 | bytes ends-with (0..<8192 | each {|| 0x[01020304] } | bytes collect) + "#); + + assert_eq!(actual.out, "true"); +} + +#[test] +fn long_stream_string_exact() { + // ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow + let actual = nu!(r#" + nu --testbin repeater hell 8192 | bytes ends-with (0..<8192 | each {|| "hell" | into binary } | bytes collect) + "#); + + assert_eq!(actual.out, "true"); +} + +#[test] +fn long_stream_mixed_exact() { + // ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow + let actual = nu!(r#" + let binseg = (0..<2048 | each {|| 0x[003d9fbf] } | bytes collect) + let strseg = (0..<2048 | each {|| "hell" | into binary } | bytes collect) + + nu --testbin repeat_bytes 003d9fbf 2048 68656c6c 2048 | bytes ends-with (bytes build $binseg $strseg) + "#); + + assert_eq!( + actual.err, "", + "invocation failed. command line limit likely reached" + ); + assert_eq!(actual.out, "true"); +} + +#[test] +fn long_stream_mixed_overflow() { + // ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow + let actual = nu!(r#" + let binseg = (0..<2048 | each {|| 0x[003d9fbf] } | bytes collect) + let strseg = (0..<2048 | each {|| "hell" | into binary } | bytes collect) + + nu --testbin repeat_bytes 003d9fbf 2048 68656c6c 2048 | bytes ends-with (bytes build 0x[01] $binseg $strseg) + "#); + + assert_eq!( + actual.err, "", + "invocation failed. command line limit likely reached" + ); + assert_eq!(actual.out, "false"); +} diff --git a/crates/nu-cmd-extra/tests/commands/bytes/mod.rs b/crates/nu-cmd-extra/tests/commands/bytes/mod.rs index b5517bdacb..a8a241eec0 100644 --- a/crates/nu-cmd-extra/tests/commands/bytes/mod.rs +++ b/crates/nu-cmd-extra/tests/commands/bytes/mod.rs @@ -1 +1,2 @@ +mod ends_with; mod starts_with; diff --git a/crates/nu-cmd-extra/tests/commands/bytes/starts_with.rs b/crates/nu-cmd-extra/tests/commands/bytes/starts_with.rs index c3ad1ec448..e7d57698b5 100644 --- a/crates/nu-cmd-extra/tests/commands/bytes/starts_with.rs +++ b/crates/nu-cmd-extra/tests/commands/bytes/starts_with.rs @@ -19,102 +19,102 @@ fn basic_string_fails() { assert_eq!(actual.out, ""); } -// #[test] -// fn short_stream_binary() { -// let actual = nu!(r#" -// nu --testbin repeater (0x[01]) 5 | bytes starts-with 0x[010101] -// "#); +#[test] +fn short_stream_binary() { + let actual = nu!(r#" + nu --testbin repeater (0x[01]) 5 | bytes starts-with 0x[010101] + "#); -// assert_eq!(actual.out, "true"); -// } + assert_eq!(actual.out, "true"); +} -// #[test] -// fn short_stream_mismatch() { -// let actual = nu!(r#" -// nu --testbin repeater (0x[010203]) 5 | bytes starts-with 0x[010204] -// "#); +#[test] +fn short_stream_mismatch() { + let actual = nu!(r#" + nu --testbin repeater (0x[010203]) 5 | bytes starts-with 0x[010204] + "#); -// assert_eq!(actual.out, "false"); -// } + assert_eq!(actual.out, "false"); +} -// #[test] -// fn short_stream_binary_overflow() { -// let actual = nu!(r#" -// nu --testbin repeater (0x[01]) 5 | bytes starts-with 0x[010101010101] -// "#); +#[test] +fn short_stream_binary_overflow() { + let actual = nu!(r#" + nu --testbin repeater (0x[01]) 5 | bytes starts-with 0x[010101010101] + "#); -// assert_eq!(actual.out, "false"); -// } + assert_eq!(actual.out, "false"); +} -// #[test] -// fn long_stream_binary() { -// let actual = nu!(r#" -// nu --testbin repeater (0x[01]) 32768 | bytes starts-with 0x[010101] -// "#); +#[test] +fn long_stream_binary() { + let actual = nu!(r#" + nu --testbin repeater (0x[01]) 32768 | bytes starts-with 0x[010101] + "#); -// assert_eq!(actual.out, "true"); -// } + assert_eq!(actual.out, "true"); +} -// #[test] -// fn long_stream_binary_overflow() { -// // .. ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow -// let actual = nu!(r#" -// nu --testbin repeater (0x[01]) 32768 | bytes starts-with (0..32768 | each {|| 0x[01] } | bytes collect) -// "#); +#[test] +fn long_stream_binary_overflow() { + // .. ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow + let actual = nu!(r#" + nu --testbin repeater (0x[01]) 32768 | bytes starts-with (0..32768 | each {|| 0x[01] } | bytes collect) + "#); -// assert_eq!(actual.out, "false"); -// } + assert_eq!(actual.out, "false"); +} -// #[test] -// fn long_stream_binary_exact() { -// // ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow -// let actual = nu!(r#" -// nu --testbin repeater (0x[01020304]) 8192 | bytes starts-with (0..<8192 | each {|| 0x[01020304] } | bytes collect) -// "#); +#[test] +fn long_stream_binary_exact() { + // ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow + let actual = nu!(r#" + nu --testbin repeater (0x[01020304]) 8192 | bytes starts-with (0..<8192 | each {|| 0x[01020304] } | bytes collect) + "#); -// assert_eq!(actual.out, "true"); -// } + assert_eq!(actual.out, "true"); +} -// #[test] -// fn long_stream_string_exact() { -// // ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow -// let actual = nu!(r#" -// nu --testbin repeater hell 8192 | bytes starts-with (0..<8192 | each {|| "hell" | into binary } | bytes collect) -// "#); +#[test] +fn long_stream_string_exact() { + // ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow + let actual = nu!(r#" + nu --testbin repeater hell 8192 | bytes starts-with (0..<8192 | each {|| "hell" | into binary } | bytes collect) + "#); -// assert_eq!(actual.out, "true"); -// } + assert_eq!(actual.out, "true"); +} -// #[test] -// fn long_stream_mixed_exact() { -// // ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow -// let actual = nu!(r#" -// let binseg = (0..<2048 | each {|| 0x[003d9fbf] } | bytes collect) -// let strseg = (0..<2048 | each {|| "hell" | into binary } | bytes collect) +#[test] +fn long_stream_mixed_exact() { + // ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow + let actual = nu!(r#" + let binseg = (0..<2048 | each {|| 0x[003d9fbf] } | bytes collect) + let strseg = (0..<2048 | each {|| "hell" | into binary } | bytes collect) -// nu --testbin repeat_bytes 003d9fbf 2048 68656c6c 2048 | bytes starts-with (bytes build $binseg $strseg) -// "#); + nu --testbin repeat_bytes 003d9fbf 2048 68656c6c 2048 | bytes starts-with (bytes build $binseg $strseg) + "#); -// assert_eq!( -// actual.err, "", -// "invocation failed. command line limit likely reached" -// ); -// assert_eq!(actual.out, "true"); -// } + assert_eq!( + actual.err, "", + "invocation failed. command line limit likely reached" + ); + assert_eq!(actual.out, "true"); +} -// #[test] -// fn long_stream_mixed_overflow() { -// // ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow -// let actual = nu!(r#" -// let binseg = (0..<2048 | each {|| 0x[003d9fbf] } | bytes collect) -// let strseg = (0..<2048 | each {|| "hell" | into binary } | bytes collect) +#[test] +fn long_stream_mixed_overflow() { + // ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow + let actual = nu!(r#" + let binseg = (0..<2048 | each {|| 0x[003d9fbf] } | bytes collect) + let strseg = (0..<2048 | each {|| "hell" | into binary } | bytes collect) -// nu --testbin repeat_bytes 003d9fbf 2048 68656c6c 2048 | bytes starts-with (bytes build $binseg $strseg 0x[01]) -// "#); + nu --testbin repeat_bytes 003d9fbf 2048 68656c6c 2048 | bytes starts-with (bytes build $binseg $strseg 0x[01]) + "#); -// assert_eq!( -// actual.err, "", -// "invocation failed. command line limit likely reached" -// ); -// assert_eq!(actual.out, "false"); -// } + assert_eq!( + actual.err, "", + "invocation failed. command line limit likely reached" + ); + assert_eq!(actual.out, "false"); +} diff --git a/crates/nu-command/src/bytes/ends_with.rs b/crates/nu-command/src/bytes/ends_with.rs index ef0389db0c..d6174a189c 100644 --- a/crates/nu-command/src/bytes/ends_with.rs +++ b/crates/nu-command/src/bytes/ends_with.rs @@ -1,5 +1,9 @@ use nu_cmd_base::input_handler::{operate, CmdArgument}; use nu_engine::command_prelude::*; +use std::{ + collections::VecDeque, + io::{self, BufRead}, +}; struct Arguments { pattern: Vec, @@ -52,14 +56,54 @@ impl Command for BytesEndsWith { call: &Call, input: PipelineData, ) -> Result { + let head = call.head; let pattern: Vec = call.req(engine_state, stack, 0)?; let cell_paths: Vec = call.rest(engine_state, stack, 1)?; let cell_paths = (!cell_paths.is_empty()).then_some(cell_paths); - let arg = Arguments { - pattern, - cell_paths, - }; - operate(ends_with, arg, input, call.head, engine_state.ctrlc.clone()) + + if let PipelineData::ByteStream(stream, ..) = input { + let span = stream.span(); + if pattern.is_empty() { + return Ok(Value::bool(true, head).into_pipeline_data()); + } + let Some(mut reader) = stream.reader() else { + return Ok(Value::bool(false, head).into_pipeline_data()); + }; + let cap = pattern.len(); + let mut end = VecDeque::::with_capacity(cap); + loop { + let buf = match reader.fill_buf() { + Ok(&[]) => break, + Ok(buf) => buf, + Err(e) if e.kind() == io::ErrorKind::Interrupted => continue, + Err(e) => return Err(e.into_spanned(span).into()), + }; + let len = buf.len(); + if len >= cap { + end.clear(); + end.extend(&buf[(len - cap)..]) + } else { + let new_len = len + end.len(); + if new_len > cap { + // The `drain` below will panic if `(new_len - cap) > end.len()`. + // But this cannot happen since we know `len < cap` (as checked above): + // (len + end.len() - cap) > end.len() + // => (len - cap) > 0 + // => len > cap + end.drain(..(new_len - cap)); + } + end.extend(buf); + } + reader.consume(len); + } + Ok(Value::bool(end == pattern, head).into_pipeline_data()) + } else { + let arg = Arguments { + pattern, + cell_paths, + }; + operate(ends_with, arg, input, head, engine_state.ctrlc.clone()) + } } fn examples(&self) -> Vec { diff --git a/crates/nu-command/src/bytes/starts_with.rs b/crates/nu-command/src/bytes/starts_with.rs index 2d7ca3e26a..92cc16f02c 100644 --- a/crates/nu-command/src/bytes/starts_with.rs +++ b/crates/nu-command/src/bytes/starts_with.rs @@ -1,5 +1,6 @@ use nu_cmd_base::input_handler::{operate, CmdArgument}; use nu_engine::command_prelude::*; +use std::io::Read; struct Arguments { pattern: Vec, @@ -53,20 +54,33 @@ impl Command for BytesStartsWith { call: &Call, input: PipelineData, ) -> Result { + let head = call.head; let pattern: Vec = call.req(engine_state, stack, 0)?; let cell_paths: Vec = call.rest(engine_state, stack, 1)?; let cell_paths = (!cell_paths.is_empty()).then_some(cell_paths); - let arg = Arguments { - pattern, - cell_paths, - }; - operate( - starts_with, - arg, - input, - call.head, - engine_state.ctrlc.clone(), - ) + + if let PipelineData::ByteStream(stream, ..) = input { + let span = stream.span(); + if pattern.is_empty() { + return Ok(Value::bool(true, head).into_pipeline_data()); + } + let Some(reader) = stream.reader() else { + return Ok(Value::bool(false, head).into_pipeline_data()); + }; + let mut start = Vec::with_capacity(pattern.len()); + reader + .take(pattern.len() as u64) + .read_to_end(&mut start) + .err_span(span)?; + + Ok(Value::bool(start == pattern, head).into_pipeline_data()) + } else { + let arg = Arguments { + pattern, + cell_paths, + }; + operate(starts_with, arg, input, head, engine_state.ctrlc.clone()) + } } fn examples(&self) -> Vec {