From 6891267b5347f42f1fc53d1f737928ade2449660 Mon Sep 17 00:00:00 2001
From: Ian Manske <ian.manske@pm.me>
Date: Thu, 16 May 2024 23:59:08 +0000
Subject: [PATCH] Support `ByteStream`s in `bytes starts-with` and `bytes
 ends-with` (#12887)

# Description
Restores `bytes starts-with` so that it is able to work with byte
streams once again. For parity/consistency, this PR also adds byte
stream support to `bytes ends-with`.

# User-Facing Changes
- `bytes ends-with` now supports byte streams.

# Tests + Formatting
Re-enabled tests for `bytes starts-with` and added tests for `bytes
ends-with`.
---
 .../tests/commands/bytes/ends_with.rs         | 120 +++++++++++++
 .../nu-cmd-extra/tests/commands/bytes/mod.rs  |   1 +
 .../tests/commands/bytes/starts_with.rs       | 160 +++++++++---------
 crates/nu-command/src/bytes/ends_with.rs      |  54 +++++-
 crates/nu-command/src/bytes/starts_with.rs    |  36 ++--
 5 files changed, 275 insertions(+), 96 deletions(-)
 create mode 100644 crates/nu-cmd-extra/tests/commands/bytes/ends_with.rs

diff --git a/crates/nu-cmd-extra/tests/commands/bytes/ends_with.rs b/crates/nu-cmd-extra/tests/commands/bytes/ends_with.rs
new file mode 100644
index 0000000000..b90f936b96
--- /dev/null
+++ b/crates/nu-cmd-extra/tests/commands/bytes/ends_with.rs
@@ -0,0 +1,120 @@
+use nu_test_support::nu;
+
+#[test]
+fn basic_binary_end_with() {
+    let actual = nu!(r#"
+            "hello world" | into binary | bytes ends-with 0x[77 6f 72 6c 64]
+        "#);
+
+    assert_eq!(actual.out, "true");
+}
+
+#[test]
+fn basic_string_fails() {
+    let actual = nu!(r#"
+            "hello world" | bytes ends-with 0x[77 6f 72 6c 64]
+        "#);
+
+    assert!(actual.err.contains("command doesn't support"));
+    assert_eq!(actual.out, "");
+}
+
+#[test]
+fn short_stream_binary() {
+    let actual = nu!(r#"
+            nu --testbin repeater (0x[01]) 5 | bytes ends-with 0x[010101]
+        "#);
+
+    assert_eq!(actual.out, "true");
+}
+
+#[test]
+fn short_stream_mismatch() {
+    let actual = nu!(r#"
+            nu --testbin repeater (0x[010203]) 5 | bytes ends-with 0x[010204]
+        "#);
+
+    assert_eq!(actual.out, "false");
+}
+
+#[test]
+fn short_stream_binary_overflow() {
+    let actual = nu!(r#"
+            nu --testbin repeater (0x[01]) 5 | bytes ends-with 0x[010101010101]
+        "#);
+
+    assert_eq!(actual.out, "false");
+}
+
+#[test]
+fn long_stream_binary() {
+    let actual = nu!(r#"
+            nu --testbin repeater (0x[01]) 32768 | bytes ends-with 0x[010101]
+        "#);
+
+    assert_eq!(actual.out, "true");
+}
+
+#[test]
+fn long_stream_binary_overflow() {
+    // .. ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow
+    let actual = nu!(r#"
+            nu --testbin repeater (0x[01]) 32768 | bytes ends-with (0..32768 | each {|| 0x[01] } | bytes collect)
+        "#);
+
+    assert_eq!(actual.out, "false");
+}
+
+#[test]
+fn long_stream_binary_exact() {
+    // ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow
+    let actual = nu!(r#"
+            nu --testbin repeater (0x[01020304]) 8192 | bytes ends-with (0..<8192 | each {|| 0x[01020304] } | bytes collect)
+        "#);
+
+    assert_eq!(actual.out, "true");
+}
+
+#[test]
+fn long_stream_string_exact() {
+    // ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow
+    let actual = nu!(r#"
+            nu --testbin repeater hell 8192 | bytes ends-with (0..<8192 | each {|| "hell" | into binary } | bytes collect)
+        "#);
+
+    assert_eq!(actual.out, "true");
+}
+
+#[test]
+fn long_stream_mixed_exact() {
+    // ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow
+    let actual = nu!(r#"
+            let binseg = (0..<2048 | each {|| 0x[003d9fbf] } | bytes collect)
+            let strseg = (0..<2048 | each {|| "hell" | into binary } | bytes collect)
+
+            nu --testbin repeat_bytes 003d9fbf 2048 68656c6c 2048 | bytes ends-with (bytes build $binseg $strseg)
+        "#);
+
+    assert_eq!(
+        actual.err, "",
+        "invocation failed. command line limit likely reached"
+    );
+    assert_eq!(actual.out, "true");
+}
+
+#[test]
+fn long_stream_mixed_overflow() {
+    // ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow
+    let actual = nu!(r#"
+            let binseg = (0..<2048 | each {|| 0x[003d9fbf] } | bytes collect)
+            let strseg = (0..<2048 | each {|| "hell" | into binary } | bytes collect)
+
+            nu --testbin repeat_bytes 003d9fbf 2048 68656c6c 2048 | bytes ends-with (bytes build 0x[01] $binseg $strseg)
+        "#);
+
+    assert_eq!(
+        actual.err, "",
+        "invocation failed. command line limit likely reached"
+    );
+    assert_eq!(actual.out, "false");
+}
diff --git a/crates/nu-cmd-extra/tests/commands/bytes/mod.rs b/crates/nu-cmd-extra/tests/commands/bytes/mod.rs
index b5517bdacb..a8a241eec0 100644
--- a/crates/nu-cmd-extra/tests/commands/bytes/mod.rs
+++ b/crates/nu-cmd-extra/tests/commands/bytes/mod.rs
@@ -1 +1,2 @@
+mod ends_with;
 mod starts_with;
diff --git a/crates/nu-cmd-extra/tests/commands/bytes/starts_with.rs b/crates/nu-cmd-extra/tests/commands/bytes/starts_with.rs
index c3ad1ec448..e7d57698b5 100644
--- a/crates/nu-cmd-extra/tests/commands/bytes/starts_with.rs
+++ b/crates/nu-cmd-extra/tests/commands/bytes/starts_with.rs
@@ -19,102 +19,102 @@ fn basic_string_fails() {
     assert_eq!(actual.out, "");
 }
 
-// #[test]
-// fn short_stream_binary() {
-//     let actual = nu!(r#"
-//             nu --testbin repeater (0x[01]) 5 | bytes starts-with 0x[010101]
-//         "#);
+#[test]
+fn short_stream_binary() {
+    let actual = nu!(r#"
+            nu --testbin repeater (0x[01]) 5 | bytes starts-with 0x[010101]
+        "#);
 
-//     assert_eq!(actual.out, "true");
-// }
+    assert_eq!(actual.out, "true");
+}
 
-// #[test]
-// fn short_stream_mismatch() {
-//     let actual = nu!(r#"
-//             nu --testbin repeater (0x[010203]) 5 | bytes starts-with 0x[010204]
-//         "#);
+#[test]
+fn short_stream_mismatch() {
+    let actual = nu!(r#"
+            nu --testbin repeater (0x[010203]) 5 | bytes starts-with 0x[010204]
+        "#);
 
-//     assert_eq!(actual.out, "false");
-// }
+    assert_eq!(actual.out, "false");
+}
 
-// #[test]
-// fn short_stream_binary_overflow() {
-//     let actual = nu!(r#"
-//             nu --testbin repeater (0x[01]) 5 | bytes starts-with 0x[010101010101]
-//         "#);
+#[test]
+fn short_stream_binary_overflow() {
+    let actual = nu!(r#"
+            nu --testbin repeater (0x[01]) 5 | bytes starts-with 0x[010101010101]
+        "#);
 
-//     assert_eq!(actual.out, "false");
-// }
+    assert_eq!(actual.out, "false");
+}
 
-// #[test]
-// fn long_stream_binary() {
-//     let actual = nu!(r#"
-//             nu --testbin repeater (0x[01]) 32768 | bytes starts-with 0x[010101]
-//         "#);
+#[test]
+fn long_stream_binary() {
+    let actual = nu!(r#"
+            nu --testbin repeater (0x[01]) 32768 | bytes starts-with 0x[010101]
+        "#);
 
-//     assert_eq!(actual.out, "true");
-// }
+    assert_eq!(actual.out, "true");
+}
 
-// #[test]
-// fn long_stream_binary_overflow() {
-//     // .. ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow
-//     let actual = nu!(r#"
-//             nu --testbin repeater (0x[01]) 32768 | bytes starts-with (0..32768 | each {|| 0x[01] } | bytes collect)
-//         "#);
+#[test]
+fn long_stream_binary_overflow() {
+    // .. ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow
+    let actual = nu!(r#"
+            nu --testbin repeater (0x[01]) 32768 | bytes starts-with (0..32768 | each {|| 0x[01] } | bytes collect)
+        "#);
 
-//     assert_eq!(actual.out, "false");
-// }
+    assert_eq!(actual.out, "false");
+}
 
-// #[test]
-// fn long_stream_binary_exact() {
-//     // ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow
-//     let actual = nu!(r#"
-//             nu --testbin repeater (0x[01020304]) 8192 | bytes starts-with (0..<8192 | each {|| 0x[01020304] } | bytes collect)
-//         "#);
+#[test]
+fn long_stream_binary_exact() {
+    // ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow
+    let actual = nu!(r#"
+            nu --testbin repeater (0x[01020304]) 8192 | bytes starts-with (0..<8192 | each {|| 0x[01020304] } | bytes collect)
+        "#);
 
-//     assert_eq!(actual.out, "true");
-// }
+    assert_eq!(actual.out, "true");
+}
 
-// #[test]
-// fn long_stream_string_exact() {
-//     // ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow
-//     let actual = nu!(r#"
-//             nu --testbin repeater hell 8192 | bytes starts-with (0..<8192 | each {|| "hell" | into binary } | bytes collect)
-//         "#);
+#[test]
+fn long_stream_string_exact() {
+    // ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow
+    let actual = nu!(r#"
+            nu --testbin repeater hell 8192 | bytes starts-with (0..<8192 | each {|| "hell" | into binary } | bytes collect)
+        "#);
 
-//     assert_eq!(actual.out, "true");
-// }
+    assert_eq!(actual.out, "true");
+}
 
-// #[test]
-// fn long_stream_mixed_exact() {
-//     // ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow
-//     let actual = nu!(r#"
-//             let binseg = (0..<2048 | each {|| 0x[003d9fbf] } | bytes collect)
-//             let strseg = (0..<2048 | each {|| "hell" | into binary } | bytes collect)
+#[test]
+fn long_stream_mixed_exact() {
+    // ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow
+    let actual = nu!(r#"
+            let binseg = (0..<2048 | each {|| 0x[003d9fbf] } | bytes collect)
+            let strseg = (0..<2048 | each {|| "hell" | into binary } | bytes collect)
 
-//             nu --testbin repeat_bytes 003d9fbf 2048 68656c6c 2048 | bytes starts-with (bytes build $binseg $strseg)
-//         "#);
+            nu --testbin repeat_bytes 003d9fbf 2048 68656c6c 2048 | bytes starts-with (bytes build $binseg $strseg)
+        "#);
 
-//     assert_eq!(
-//         actual.err, "",
-//         "invocation failed. command line limit likely reached"
-//     );
-//     assert_eq!(actual.out, "true");
-// }
+    assert_eq!(
+        actual.err, "",
+        "invocation failed. command line limit likely reached"
+    );
+    assert_eq!(actual.out, "true");
+}
 
-// #[test]
-// fn long_stream_mixed_overflow() {
-//     // ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow
-//     let actual = nu!(r#"
-//             let binseg = (0..<2048 | each {|| 0x[003d9fbf] } | bytes collect)
-//             let strseg = (0..<2048 | each {|| "hell" | into binary } | bytes collect)
+#[test]
+fn long_stream_mixed_overflow() {
+    // ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow
+    let actual = nu!(r#"
+            let binseg = (0..<2048 | each {|| 0x[003d9fbf] } | bytes collect)
+            let strseg = (0..<2048 | each {|| "hell" | into binary } | bytes collect)
 
-//             nu --testbin repeat_bytes 003d9fbf 2048 68656c6c 2048 | bytes starts-with (bytes build $binseg $strseg 0x[01])
-//         "#);
+            nu --testbin repeat_bytes 003d9fbf 2048 68656c6c 2048 | bytes starts-with (bytes build $binseg $strseg 0x[01])
+        "#);
 
-//     assert_eq!(
-//         actual.err, "",
-//         "invocation failed. command line limit likely reached"
-//     );
-//     assert_eq!(actual.out, "false");
-// }
+    assert_eq!(
+        actual.err, "",
+        "invocation failed. command line limit likely reached"
+    );
+    assert_eq!(actual.out, "false");
+}
diff --git a/crates/nu-command/src/bytes/ends_with.rs b/crates/nu-command/src/bytes/ends_with.rs
index ef0389db0c..d6174a189c 100644
--- a/crates/nu-command/src/bytes/ends_with.rs
+++ b/crates/nu-command/src/bytes/ends_with.rs
@@ -1,5 +1,9 @@
 use nu_cmd_base::input_handler::{operate, CmdArgument};
 use nu_engine::command_prelude::*;
+use std::{
+    collections::VecDeque,
+    io::{self, BufRead},
+};
 
 struct Arguments {
     pattern: Vec<u8>,
@@ -52,14 +56,54 @@ impl Command for BytesEndsWith {
         call: &Call,
         input: PipelineData,
     ) -> Result<PipelineData, ShellError> {
+        let head = call.head;
         let pattern: Vec<u8> = call.req(engine_state, stack, 0)?;
         let cell_paths: Vec<CellPath> = call.rest(engine_state, stack, 1)?;
         let cell_paths = (!cell_paths.is_empty()).then_some(cell_paths);
-        let arg = Arguments {
-            pattern,
-            cell_paths,
-        };
-        operate(ends_with, arg, input, call.head, engine_state.ctrlc.clone())
+
+        if let PipelineData::ByteStream(stream, ..) = input {
+            let span = stream.span();
+            if pattern.is_empty() {
+                return Ok(Value::bool(true, head).into_pipeline_data());
+            }
+            let Some(mut reader) = stream.reader() else {
+                return Ok(Value::bool(false, head).into_pipeline_data());
+            };
+            let cap = pattern.len();
+            let mut end = VecDeque::<u8>::with_capacity(cap);
+            loop {
+                let buf = match reader.fill_buf() {
+                    Ok(&[]) => break,
+                    Ok(buf) => buf,
+                    Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
+                    Err(e) => return Err(e.into_spanned(span).into()),
+                };
+                let len = buf.len();
+                if len >= cap {
+                    end.clear();
+                    end.extend(&buf[(len - cap)..])
+                } else {
+                    let new_len = len + end.len();
+                    if new_len > cap {
+                        // The `drain` below will panic if `(new_len - cap) > end.len()`.
+                        // But this cannot happen since we know `len < cap` (as checked above):
+                        //   (len + end.len() - cap) > end.len()
+                        //   => (len - cap) > 0
+                        //   => len > cap
+                        end.drain(..(new_len - cap));
+                    }
+                    end.extend(buf);
+                }
+                reader.consume(len);
+            }
+            Ok(Value::bool(end == pattern, head).into_pipeline_data())
+        } else {
+            let arg = Arguments {
+                pattern,
+                cell_paths,
+            };
+            operate(ends_with, arg, input, head, engine_state.ctrlc.clone())
+        }
     }
 
     fn examples(&self) -> Vec<Example> {
diff --git a/crates/nu-command/src/bytes/starts_with.rs b/crates/nu-command/src/bytes/starts_with.rs
index 2d7ca3e26a..92cc16f02c 100644
--- a/crates/nu-command/src/bytes/starts_with.rs
+++ b/crates/nu-command/src/bytes/starts_with.rs
@@ -1,5 +1,6 @@
 use nu_cmd_base::input_handler::{operate, CmdArgument};
 use nu_engine::command_prelude::*;
+use std::io::Read;
 
 struct Arguments {
     pattern: Vec<u8>,
@@ -53,20 +54,33 @@ impl Command for BytesStartsWith {
         call: &Call,
         input: PipelineData,
     ) -> Result<PipelineData, ShellError> {
+        let head = call.head;
         let pattern: Vec<u8> = call.req(engine_state, stack, 0)?;
         let cell_paths: Vec<CellPath> = call.rest(engine_state, stack, 1)?;
         let cell_paths = (!cell_paths.is_empty()).then_some(cell_paths);
-        let arg = Arguments {
-            pattern,
-            cell_paths,
-        };
-        operate(
-            starts_with,
-            arg,
-            input,
-            call.head,
-            engine_state.ctrlc.clone(),
-        )
+
+        if let PipelineData::ByteStream(stream, ..) = input {
+            let span = stream.span();
+            if pattern.is_empty() {
+                return Ok(Value::bool(true, head).into_pipeline_data());
+            }
+            let Some(reader) = stream.reader() else {
+                return Ok(Value::bool(false, head).into_pipeline_data());
+            };
+            let mut start = Vec::with_capacity(pattern.len());
+            reader
+                .take(pattern.len() as u64)
+                .read_to_end(&mut start)
+                .err_span(span)?;
+
+            Ok(Value::bool(start == pattern, head).into_pipeline_data())
+        } else {
+            let arg = Arguments {
+                pattern,
+                cell_paths,
+            };
+            operate(starts_with, arg, input, head, engine_state.ctrlc.clone())
+        }
     }
 
     fn examples(&self) -> Vec<Example> {