From b68c7cf3fac9b5510c600e08ff70b6af0ec6adb3 Mon Sep 17 00:00:00 2001 From: Jack Wright <56345+ayax79@users.noreply.github.com> Date: Wed, 10 Jul 2024 14:36:38 -0700 Subject: [PATCH 1/8] Make `polars unpivot` consistent with `polars pivot` (#13335) # Description Makes `polars unpivot` use the same arguments as `polars pivot` and makes it consistent with the polars' rust api. Additionally, support for the polar's streaming engine has been exposed on eager dataframes. Previously, it would only work with lazy dataframes. # User-Facing Changes * `polars unpivot` argument `--columns`|`-c` has been renamed to `--index`|`-i` * `polars unpivot` argument `--values`|`-v` has been renamed to `--on`|`-o` * `polars unpivot` short argument for `--streamable` is now `-t` to make it consistent with `polars pivot`. It was made `-t` for `polars pivot` because `-s` is short for `--short` --- .../src/dataframe/eager/unpivot.rs | 74 ++++++++----------- 1 file changed, 31 insertions(+), 43 deletions(-) diff --git a/crates/nu_plugin_polars/src/dataframe/eager/unpivot.rs b/crates/nu_plugin_polars/src/dataframe/eager/unpivot.rs index c535b54c0e..dafdc65ab4 100644 --- a/crates/nu_plugin_polars/src/dataframe/eager/unpivot.rs +++ b/crates/nu_plugin_polars/src/dataframe/eager/unpivot.rs @@ -30,16 +30,16 @@ impl PluginCommand for UnpivotDF { fn signature(&self) -> Signature { Signature::build(self.name()) .required_named( - "columns", + "index", SyntaxShape::Table(vec![]), "column names for unpivoting", - Some('c'), + Some('i'), ) .required_named( - "values", + "on", SyntaxShape::Table(vec![]), "column names used as value columns", - Some('v'), + Some('o'), ) .named( "variable-name", @@ -60,7 +60,7 @@ impl PluginCommand for UnpivotDF { .switch( "streamable", "Whether or not to use the polars streaming engine. Only valid for lazy dataframes", - Some('s'), + Some('t'), ) .category(Category::Custom("dataframe".into())) } @@ -70,7 +70,7 @@ impl PluginCommand for UnpivotDF { Example { description: "unpivot on an eager dataframe", example: - "[[a b c d]; [x 1 4 a] [y 2 5 b] [z 3 6 c]] | polars into-df | polars unpivot -c [b c] -v [a d]", + "[[a b c d]; [x 1 4 a] [y 2 5 b] [z 3 6 c]] | polars into-df | polars unpivot -i [b c] -o [a d]", result: Some( NuDataFrame::try_from_columns(vec![ Column::new( @@ -125,7 +125,7 @@ impl PluginCommand for UnpivotDF { Example { description: "unpivot on a lazy dataframe", example: - "[[a b c d]; [x 1 4 a] [y 2 5 b] [z 3 6 c]] | polars into-lazy | polars unpivot -c [b c] -v [a d] | polars collect", + "[[a b c d]; [x 1 4 a] [y 2 5 b] [z 3 6 c]] | polars into-lazy | polars unpivot -i [b c] -o [a d] | polars collect", result: Some( NuDataFrame::try_from_columns(vec![ Column::new( @@ -208,21 +208,31 @@ fn command_eager( call: &EvaluatedCall, df: NuDataFrame, ) -> Result { - let id_col: Vec = call.get_flag("columns")?.expect("required value"); - let val_col: Vec = call.get_flag("values")?.expect("required value"); + let index_col: Vec = call.get_flag("index")?.expect("required value"); + let on_col: Vec = call.get_flag("on")?.expect("required value"); let value_name: Option> = call.get_flag("value-name")?; let variable_name: Option> = call.get_flag("variable-name")?; - let (id_col_string, id_col_span) = convert_columns_string(id_col, call.head)?; - let (val_col_string, val_col_span) = convert_columns_string(val_col, call.head)?; + let (index_col_string, index_col_span) = convert_columns_string(index_col, call.head)?; + let (on_col_string, on_col_span) = convert_columns_string(on_col, call.head)?; - check_column_datatypes(df.as_ref(), &id_col_string, id_col_span)?; - check_column_datatypes(df.as_ref(), &val_col_string, val_col_span)?; + check_column_datatypes(df.as_ref(), &index_col_string, index_col_span)?; + check_column_datatypes(df.as_ref(), &on_col_string, on_col_span)?; - let mut res = df + let streamable = call.has_flag("streamable")?; + + let args = UnpivotArgs { + on: on_col_string.iter().map(Into::into).collect(), + index: index_col_string.iter().map(Into::into).collect(), + variable_name: variable_name.map(|s| s.item.into()), + value_name: value_name.map(|s| s.item.into()), + streamable, + }; + + let res = df .as_ref() - .unpivot(&val_col_string, &id_col_string) + .unpivot2(args) .map_err(|e| ShellError::GenericError { error: "Error calculating unpivot".into(), msg: e.to_string(), @@ -231,28 +241,6 @@ fn command_eager( inner: vec![], })?; - if let Some(name) = &variable_name { - res.rename("variable", &name.item) - .map_err(|e| ShellError::GenericError { - error: "Error renaming column".into(), - msg: e.to_string(), - span: Some(name.span), - help: None, - inner: vec![], - })?; - } - - if let Some(name) = &value_name { - res.rename("value", &name.item) - .map_err(|e| ShellError::GenericError { - error: "Error renaming column".into(), - msg: e.to_string(), - span: Some(name.span), - help: None, - inner: vec![], - })?; - } - let res = NuDataFrame::new(false, res); res.to_pipeline_data(plugin, engine, call.head) } @@ -263,11 +251,11 @@ fn command_lazy( call: &EvaluatedCall, df: NuLazyFrame, ) -> Result { - let id_col: Vec = call.get_flag("columns")?.expect("required value"); - let val_col: Vec = call.get_flag("values")?.expect("required value"); + let index_col: Vec = call.get_flag("index")?.expect("required value"); + let on_col: Vec = call.get_flag("on")?.expect("required value"); - let (id_col_string, _id_col_span) = convert_columns_string(id_col, call.head)?; - let (val_col_string, _val_col_span) = convert_columns_string(val_col, call.head)?; + let (index_col_string, _index_col_span) = convert_columns_string(index_col, call.head)?; + let (on_col_string, _on_col_span) = convert_columns_string(on_col, call.head)?; let value_name: Option = call.get_flag("value-name")?; let variable_name: Option = call.get_flag("variable-name")?; @@ -275,8 +263,8 @@ fn command_lazy( let streamable = call.has_flag("streamable")?; let unpivot_args = UnpivotArgs { - on: val_col_string.iter().map(Into::into).collect(), - index: id_col_string.iter().map(Into::into).collect(), + on: on_col_string.iter().map(Into::into).collect(), + index: index_col_string.iter().map(Into::into).collect(), value_name: value_name.map(Into::into), variable_name: variable_name.map(Into::into), streamable, From 616e9faaf13ccd90fb898008b5921c235893357e Mon Sep 17 00:00:00 2001 From: 132ikl <132@ikl.sh> Date: Wed, 10 Jul 2024 19:05:24 -0400 Subject: [PATCH 2/8] Fix main binary being rebuilt when nothing has changed (#13337) # Description The build script is currently re-run on each `cargo build` even when it has not changed. The `rerun-if-changed` line points to `/build.rs`, but `build.rs` is actually located at `/scripts/build.rs`. This updates that path. # User-Facing Changes N/A # Tests + Formatting N/A --- scripts/build.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/build.rs b/scripts/build.rs index beabba80dc..e366b4998c 100644 --- a/scripts/build.rs +++ b/scripts/build.rs @@ -14,5 +14,5 @@ fn main() { // Tango uses dynamic linking, to allow us to dynamically change between two bench suit at runtime. // This is currently not supported on non nightly rust, on windows. println!("cargo:rustc-link-arg-benches=-rdynamic"); - println!("cargo:rerun-if-changed=build.rs"); + println!("cargo:rerun-if-changed=scripts/build.rs"); } From ea8c4e3af28d778d98fd570800c74a3e19cddf29 Mon Sep 17 00:00:00 2001 From: Devyn Cairns Date: Wed, 10 Jul 2024 16:16:22 -0700 Subject: [PATCH 3/8] Make pipe redirections consistent, add `err>|` etc. forms (#13334) # Description Fixes the lexer to recognize `out>|`, `err>|`, `out+err>|`, etc. Previously only the short-style forms were recognized, which was inconsistent with normal file redirections. I also integrated it all more into the normal lex path by checking `|` in a special way, which should be more performant and consistent, and cleans up the code a bunch. Closes #13331. # User-Facing Changes - Adds `out>|` (error), `err>|`, `out+err>|`, `err+out>|` as recognized forms of the pipe redirection. # Tests + Formatting All passing. Added tests for the new forms. # After Submitting - [ ] release notes --- crates/nu-parser/src/lex.rs | 103 ++++++---------------- tests/shell/pipeline/commands/external.rs | 23 +++-- 2 files changed, 44 insertions(+), 82 deletions(-) diff --git a/crates/nu-parser/src/lex.rs b/crates/nu-parser/src/lex.rs index 9b52467ef4..3290a774f4 100644 --- a/crates/nu-parser/src/lex.rs +++ b/crates/nu-parser/src/lex.rs @@ -238,6 +238,10 @@ pub fn lex_item( Some(e), ); } + } else if c == b'|' && is_redirection(&input[token_start..*curr_offset]) { + // matches err>| etc. + *curr_offset += 1; + break; } else if is_item_terminator(&block_level, c, additional_whitespace, special_tokens) { break; } @@ -301,6 +305,16 @@ pub fn lex_item( contents: TokenContents::OutGreaterGreaterThan, span, }, + b"out>|" | b"o>|" => { + err = Some(ParseError::Expected( + "`|`. Redirecting stdout to a pipe is the same as normal piping.", + span, + )); + Token { + contents: TokenContents::Item, + span, + } + } b"err>" | b"e>" => Token { contents: TokenContents::ErrGreaterThan, span, @@ -309,6 +323,10 @@ pub fn lex_item( contents: TokenContents::ErrGreaterGreaterThan, span, }, + b"err>|" | b"e>|" => Token { + contents: TokenContents::ErrGreaterPipe, + span, + }, b"out+err>" | b"err+out>" | b"o+e>" | b"e+o>" => Token { contents: TokenContents::OutErrGreaterThan, span, @@ -317,6 +335,10 @@ pub fn lex_item( contents: TokenContents::OutErrGreaterGreaterThan, span, }, + b"out+err>|" | b"err+out>|" | b"o+e>|" | b"e+o>|" => Token { + contents: TokenContents::OutErrGreaterPipe, + span, + }, b"&&" => { err = Some(ParseError::ShellAndAnd(span)); Token { @@ -580,17 +602,6 @@ fn lex_internal( // If the next character is non-newline whitespace, skip it. curr_offset += 1; } else { - let (token, err) = try_lex_special_piped_item(input, &mut curr_offset, span_offset); - if error.is_none() { - error = err; - } - if let Some(token) = token { - output.push(token); - is_complete = false; - continue; - } - - // Otherwise, try to consume an unclassified token. let (token, err) = lex_item( input, &mut curr_offset, @@ -609,68 +620,10 @@ fn lex_internal( (output, error) } -/// trying to lex for the following item: -/// e>|, e+o>|, o+e>| -/// -/// It returns Some(token) if we find the item, or else return None. -fn try_lex_special_piped_item( - input: &[u8], - curr_offset: &mut usize, - span_offset: usize, -) -> (Option, Option) { - let c = input[*curr_offset]; - let e_pipe_len = 3; - let eo_pipe_len = 5; - let o_pipe_len = 3; - let offset = *curr_offset; - if c == b'e' { - // expect `e>|` - if (offset + e_pipe_len <= input.len()) && (&input[offset..offset + e_pipe_len] == b"e>|") { - *curr_offset += e_pipe_len; - return ( - Some(Token::new( - TokenContents::ErrGreaterPipe, - Span::new(span_offset + offset, span_offset + offset + e_pipe_len), - )), - None, - ); - } - if (offset + eo_pipe_len <= input.len()) - && (&input[offset..offset + eo_pipe_len] == b"e+o>|") - { - *curr_offset += eo_pipe_len; - return ( - Some(Token::new( - TokenContents::OutErrGreaterPipe, - Span::new(span_offset + offset, span_offset + offset + eo_pipe_len), - )), - None, - ); - } - } else if c == b'o' { - // indicates an error if user happened to type `o>|` - if offset + o_pipe_len <= input.len() && (&input[offset..offset + o_pipe_len] == b"o>|") { - return ( - None, - Some(ParseError::Expected( - "`|`. Redirecting stdout to a pipe is the same as normal piping.", - Span::new(span_offset + offset, span_offset + offset + o_pipe_len), - )), - ); - } - // it can be the following case: `o+e>|` - if (offset + eo_pipe_len <= input.len()) - && (&input[offset..offset + eo_pipe_len] == b"o+e>|") - { - *curr_offset += eo_pipe_len; - return ( - Some(Token::new( - TokenContents::OutErrGreaterPipe, - Span::new(span_offset + offset, span_offset + offset + eo_pipe_len), - )), - None, - ); - } - } - (None, None) +/// True if this the start of a redirection. Does not match `>>` or `>|` forms. +fn is_redirection(token: &[u8]) -> bool { + matches!( + token, + b"o>" | b"out>" | b"e>" | b"err>" | b"o+e>" | b"e+o>" | b"out+err>" | b"err+out>" + ) } diff --git a/tests/shell/pipeline/commands/external.rs b/tests/shell/pipeline/commands/external.rs index d138b65766..a6efe2b6c9 100644 --- a/tests/shell/pipeline/commands/external.rs +++ b/tests/shell/pipeline/commands/external.rs @@ -149,17 +149,26 @@ fn command_substitution_wont_output_extra_newline() { assert_eq!(actual.out, "bar"); } -#[test] -fn basic_err_pipe_works() { - let actual = - nu!(r#"with-env { FOO: "bar" } { nu --testbin echo_env_stderr FOO e>| str length }"#); +#[rstest::rstest] +#[case("err>|")] +#[case("e>|")] +fn basic_err_pipe_works(#[case] redirection: &str) { + let actual = nu!( + r#"with-env { FOO: "bar" } { nu --testbin echo_env_stderr FOO {redirection} str length }"# + .replace("{redirection}", redirection) + ); assert_eq!(actual.out, "3"); } -#[test] -fn basic_outerr_pipe_works() { +#[rstest::rstest] +#[case("out+err>|")] +#[case("err+out>|")] +#[case("o+e>|")] +#[case("e+o>|")] +fn basic_outerr_pipe_works(#[case] redirection: &str) { let actual = nu!( - r#"with-env { FOO: "bar" } { nu --testbin echo_env_mixed out-err FOO FOO o+e>| str length }"# + r#"with-env { FOO: "bar" } { nu --testbin echo_env_mixed out-err FOO FOO {redirection} str length }"# + .replace("{redirection}", redirection) ); assert_eq!(actual.out, "7"); } From d7392f1f3b9b8ffb58bc5f6f00d4de02b149a23d Mon Sep 17 00:00:00 2001 From: Devyn Cairns Date: Wed, 10 Jul 2024 17:33:59 -0700 Subject: [PATCH 4/8] Internal representation (IR) compiler and evaluator (#13330) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Description This PR adds an internal representation language to Nushell, offering an alternative evaluator based on simple instructions, stream-containing registers, and indexed control flow. The number of registers required is determined statically at compile-time, and the fixed size required is allocated upon entering the block. Each instruction is associated with a span, which makes going backwards from IR instructions to source code very easy. Motivations for IR: 1. **Performance.** By simplifying the evaluation path and making it more cache-friendly and branch predictor-friendly, code that does a lot of computation in Nushell itself can be sped up a decent bit. Because the IR is fairly easy to reason about, we can also implement optimization passes in the future to eliminate and simplify code. 2. **Correctness.** The instructions mostly have very simple and easily-specified behavior, so hopefully engine changes are a little bit easier to reason about, and they can be specified in a more formal way at some point. I have made an effort to document each of the instructions in the docs for the enum itself in a reasonably specific way. Some of the errors that would have happened during evaluation before are now moved to the compilation step instead, because they don't make sense to check during evaluation. 3. **As an intermediate target.** This is a good step for us to bring the [`new-nu-parser`](https://github.com/nushell/new-nu-parser) in at some point, as code generated from new AST can be directly compared to code generated from old AST. If the IR code is functionally equivalent, it will behave the exact same way. 4. **Debugging.** With a little bit more work, we can probably give control over advancing the virtual machine that `IrBlock`s run on to some sort of external driver, making things like breakpoints and single stepping possible. Tools like `view ir` and [`explore ir`](https://github.com/devyn/nu_plugin_explore_ir) make it easier than before to see what exactly is going on with your Nushell code. The goal is to eventually replace the AST evaluator entirely, once we're sure it's working just as well. You can help dogfood this by running Nushell with `$env.NU_USE_IR` set to some value. The environment variable is checked when Nushell starts, so config runs with IR, or it can also be set on a line at the REPL to change it dynamically. It is also checked when running `do` in case within a script you want to just run a specific piece of code with or without IR. # Example ```nushell view ir { |data| mut sum = 0 for n in $data { $sum += $n } $sum } ``` ```gas # 3 registers, 19 instructions, 0 bytes of data 0: load-literal %0, int(0) 1: store-variable var 904, %0 # let 2: drain %0 3: drop %0 4: load-variable %1, var 903 5: iterate %0, %1, end 15 # for, label(1), from(14:) 6: store-variable var 905, %0 7: load-variable %0, var 904 8: load-variable %2, var 905 9: binary-op %0, Math(Plus), %2 10: span %0 11: store-variable var 904, %0 12: load-literal %0, nothing 13: drain %0 14: jump 5 15: drop %0 # label(0), from(5:) 16: drain %0 17: load-variable %0, var 904 18: return %0 ``` # Benchmarks All benchmarks run on a base model Mac Mini M1. ## Iterative Fibonacci sequence This is about as best case as possible, making use of the much faster control flow. Most code will not experience a speed improvement nearly this large. ```nushell def fib [n: int] { mut a = 0 mut b = 1 for _ in 2..=$n { let c = $a + $b $a = $b $b = $c } $b } use std bench bench { 0..50 | each { |n| fib $n } } ``` IR disabled: ``` ╭───────┬─────────────────╮ │ mean │ 1ms 924µs 665ns │ │ min │ 1ms 700µs 83ns │ │ max │ 3ms 450µs 125ns │ │ std │ 395µs 759ns │ │ times │ [list 50 items] │ ╰───────┴─────────────────╯ ``` IR enabled: ``` ╭───────┬─────────────────╮ │ mean │ 452µs 820ns │ │ min │ 427µs 417ns │ │ max │ 540µs 167ns │ │ std │ 17µs 158ns │ │ times │ [list 50 items] │ ╰───────┴─────────────────╯ ``` ![explore ir view](https://github.com/nushell/nushell/assets/10729/d7bccc03-5222-461c-9200-0dce71b83b83) ## [gradient_benchmark_no_check.nu](https://github.com/nushell/nu_scripts/blob/main/benchmarks/gradient_benchmark_no_check.nu) IR disabled: ``` ╭───┬──────────────────╮ │ 0 │ 27ms 929µs 958ns │ │ 1 │ 21ms 153µs 459ns │ │ 2 │ 18ms 639µs 666ns │ │ 3 │ 19ms 554µs 583ns │ │ 4 │ 13ms 383µs 375ns │ │ 5 │ 11ms 328µs 208ns │ │ 6 │ 5ms 659µs 542ns │ ╰───┴──────────────────╯ ``` IR enabled: ``` ╭───┬──────────────────╮ │ 0 │ 22ms 662µs │ │ 1 │ 17ms 221µs 792ns │ │ 2 │ 14ms 786µs 708ns │ │ 3 │ 13ms 876µs 834ns │ │ 4 │ 13ms 52µs 875ns │ │ 5 │ 11ms 269µs 666ns │ │ 6 │ 6ms 942µs 500ns │ ╰───┴──────────────────╯ ``` ## [random-bytes.nu](https://github.com/nushell/nu_scripts/blob/main/benchmarks/random-bytes.nu) I got pretty random results out of this benchmark so I decided not to include it. Not clear why. # User-Facing Changes - IR compilation errors may appear even if the user isn't evaluating with IR. - IR evaluation can be enabled by setting the `NU_USE_IR` environment variable to any value. - New command `view ir` pretty-prints the IR for a block, and `view ir --json` can be piped into an external tool like [`explore ir`](https://github.com/devyn/nu_plugin_explore_ir). # Tests + Formatting All tests are passing with `NU_USE_IR=1`, and I've added some more eval tests to compare the results for some very core operations. I will probably want to add some more so we don't have to always check `NU_USE_IR=1 toolkit test --workspace` on a regular basis. # After Submitting - [ ] release notes - [ ] further documentation of instructions? - [ ] post-release: publish `nu_plugin_explore_ir` --- Cargo.lock | 3 + Cargo.toml | 3 +- benches/benchmarks.rs | 4 + .../nu-cli/src/commands/keybindings_list.rs | 28 +- crates/nu-cli/src/eval_cmds.rs | 5 + crates/nu-cli/src/repl.rs | 3 + crates/nu-cli/src/util.rs | 5 + .../nu-cmd-lang/src/core_commands/const_.rs | 3 + crates/nu-cmd-lang/src/core_commands/do_.rs | 4 + crates/nu-cmd-lang/src/core_commands/for_.rs | 3 + crates/nu-cmd-lang/src/core_commands/if_.rs | 6 + crates/nu-cmd-lang/src/core_commands/let_.rs | 3 + crates/nu-cmd-lang/src/core_commands/loop_.rs | 3 + .../nu-cmd-lang/src/core_commands/match_.rs | 3 + crates/nu-cmd-lang/src/core_commands/mut_.rs | 3 + .../src/core_commands/overlay/use_.rs | 8 +- crates/nu-cmd-lang/src/core_commands/try_.rs | 3 + crates/nu-cmd-lang/src/core_commands/use_.rs | 7 +- .../nu-cmd-lang/src/core_commands/while_.rs | 3 + crates/nu-command/src/bytes/build_.rs | 6 +- crates/nu-command/src/debug/explain.rs | 4 +- crates/nu-command/src/debug/metadata.rs | 6 +- crates/nu-command/src/debug/mod.rs | 2 + crates/nu-command/src/debug/timeit.rs | 14 +- crates/nu-command/src/debug/view_ir.rs | 83 + crates/nu-command/src/default_context.rs | 1 + crates/nu-command/src/env/export_env.rs | 6 +- crates/nu-command/src/env/source_env.rs | 2 +- crates/nu-command/src/filesystem/du.rs | 2 +- crates/nu-command/src/filesystem/ls.rs | 2 +- crates/nu-command/src/filesystem/open.rs | 7 +- crates/nu-command/src/filesystem/save.rs | 64 +- crates/nu-command/src/filesystem/util.rs | 39 +- crates/nu-command/src/filters/transpose.rs | 10 +- crates/nu-command/src/filters/utils.rs | 3 +- crates/nu-command/src/generators/cal.rs | 11 +- crates/nu-command/src/math/utils.rs | 2 +- crates/nu-command/src/platform/ansi/ansi_.rs | 27 +- crates/nu-command/src/platform/is_terminal.rs | 2 +- crates/nu-command/src/platform/kill.rs | 17 +- .../src/strings/encode_decode/base64.rs | 4 +- crates/nu-command/src/strings/mod.rs | 3 +- crates/nu-command/src/system/nu_check.rs | 2 +- crates/nu-command/src/system/run_external.rs | 68 +- crates/nu-command/src/system/uname.rs | 9 +- crates/nu-command/src/viewers/table.rs | 4 +- crates/nu-engine/Cargo.toml | 3 +- crates/nu-engine/src/call_ext.rs | 219 ++- crates/nu-engine/src/command_prelude.rs | 4 +- crates/nu-engine/src/compile/builder.rs | 575 +++++++ crates/nu-engine/src/compile/call.rs | 270 +++ crates/nu-engine/src/compile/expression.rs | 535 ++++++ crates/nu-engine/src/compile/keyword.rs | 902 ++++++++++ crates/nu-engine/src/compile/mod.rs | 204 +++ crates/nu-engine/src/compile/operator.rs | 378 +++++ crates/nu-engine/src/compile/redirect.rs | 157 ++ crates/nu-engine/src/documentation.rs | 9 +- crates/nu-engine/src/env.rs | 21 +- crates/nu-engine/src/eval.rs | 16 +- crates/nu-engine/src/eval_helpers.rs | 16 +- crates/nu-engine/src/eval_ir.rs | 1462 +++++++++++++++++ crates/nu-engine/src/lib.rs | 4 + crates/nu-parser/src/known_external.rs | 204 ++- crates/nu-parser/src/parse_patterns.rs | 2 +- crates/nu-parser/src/parser.rs | 26 +- crates/nu-parser/tests/test_parser.rs | 9 +- crates/nu-plugin-engine/src/context.rs | 11 +- .../nu-plugin-protocol/src/evaluated_call.rs | 40 +- crates/nu-protocol/Cargo.toml | 3 +- crates/nu-protocol/src/alias.rs | 4 +- crates/nu-protocol/src/ast/block.rs | 11 +- crates/nu-protocol/src/ast/expr.rs | 14 +- crates/nu-protocol/src/ast/match_pattern.rs | 6 +- crates/nu-protocol/src/ast/pipeline.rs | 14 +- crates/nu-protocol/src/engine/argument.rs | 124 ++ crates/nu-protocol/src/engine/call.rs | 223 +++ crates/nu-protocol/src/engine/command.rs | 8 +- .../nu-protocol/src/engine/error_handler.rs | 55 + crates/nu-protocol/src/engine/mod.rs | 6 + crates/nu-protocol/src/engine/stack.rs | 22 +- .../src/engine/state_working_set.rs | 12 +- crates/nu-protocol/src/errors/cli_error.rs | 4 + .../nu-protocol/src/errors/compile_error.rs | 238 +++ crates/nu-protocol/src/errors/mod.rs | 2 + crates/nu-protocol/src/errors/shell_error.rs | 17 + crates/nu-protocol/src/eval_const.rs | 2 +- crates/nu-protocol/src/id.rs | 14 + crates/nu-protocol/src/ir/call.rs | 351 ++++ crates/nu-protocol/src/ir/display.rs | 452 +++++ crates/nu-protocol/src/ir/mod.rs | 419 +++++ crates/nu-protocol/src/lib.rs | 1 + .../nu-protocol/src/pipeline/byte_stream.rs | 6 + .../nu-protocol/src/pipeline/list_stream.rs | 11 + .../nu-protocol/src/pipeline/pipeline_data.rs | 29 +- crates/nu-protocol/src/signature.rs | 3 +- crates/nu-protocol/src/span.rs | 16 + crates/nu-test-support/src/macros.rs | 11 + src/run.rs | 12 + tests/eval/mod.rs | 457 +++++- 99 files changed, 7768 insertions(+), 346 deletions(-) create mode 100644 crates/nu-command/src/debug/view_ir.rs create mode 100644 crates/nu-engine/src/compile/builder.rs create mode 100644 crates/nu-engine/src/compile/call.rs create mode 100644 crates/nu-engine/src/compile/expression.rs create mode 100644 crates/nu-engine/src/compile/keyword.rs create mode 100644 crates/nu-engine/src/compile/mod.rs create mode 100644 crates/nu-engine/src/compile/operator.rs create mode 100644 crates/nu-engine/src/compile/redirect.rs create mode 100644 crates/nu-engine/src/eval_ir.rs create mode 100644 crates/nu-protocol/src/engine/argument.rs create mode 100644 crates/nu-protocol/src/engine/call.rs create mode 100644 crates/nu-protocol/src/engine/error_handler.rs create mode 100644 crates/nu-protocol/src/errors/compile_error.rs create mode 100644 crates/nu-protocol/src/ir/call.rs create mode 100644 crates/nu-protocol/src/ir/display.rs create mode 100644 crates/nu-protocol/src/ir/mod.rs diff --git a/Cargo.lock b/Cargo.lock index d4f759aed0..7243a801fc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2900,6 +2900,7 @@ dependencies = [ "openssl", "pretty_assertions", "reedline", + "regex", "rstest", "serde_json", "serial_test", @@ -3151,6 +3152,7 @@ dependencies = [ name = "nu-engine" version = "0.95.1" dependencies = [ + "log", "nu-glob", "nu-path", "nu-protocol", @@ -3339,6 +3341,7 @@ dependencies = [ "convert_case", "fancy-regex", "indexmap", + "log", "lru", "miette", "nix", diff --git a/Cargo.toml b/Cargo.toml index 5701fe3958..eb8a92c630 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -232,6 +232,7 @@ assert_cmd = "2.0" dirs-next = { workspace = true } tango-bench = "0.5" pretty_assertions = { workspace = true } +regex = { workspace = true } rstest = { workspace = true, default-features = false } serial_test = "3.1" tempfile = { workspace = true } @@ -310,4 +311,4 @@ reedline = { git = "https://github.com/nushell/reedline", branch = "main" } # Run individual benchmarks like `cargo bench -- ` e.g. `cargo bench -- parse` [[bench]] name = "benchmarks" -harness = false \ No newline at end of file +harness = false diff --git a/benches/benchmarks.rs b/benches/benchmarks.rs index ea296f0d06..4efb666507 100644 --- a/benches/benchmarks.rs +++ b/benches/benchmarks.rs @@ -45,6 +45,10 @@ fn setup_stack_and_engine_from_command(command: &str) -> (Stack, EngineState) { }; let mut stack = Stack::new(); + + // Support running benchmarks with IR mode + stack.use_ir = std::env::var_os("NU_USE_IR").is_some(); + evaluate_commands( &commands, &mut engine, diff --git a/crates/nu-cli/src/commands/keybindings_list.rs b/crates/nu-cli/src/commands/keybindings_list.rs index f4450c0c23..350df7b820 100644 --- a/crates/nu-cli/src/commands/keybindings_list.rs +++ b/crates/nu-cli/src/commands/keybindings_list.rs @@ -49,22 +49,24 @@ impl Command for KeybindingsList { fn run( &self, - _engine_state: &EngineState, - _stack: &mut Stack, + engine_state: &EngineState, + stack: &mut Stack, call: &Call, _input: PipelineData, ) -> Result { - let records = if call.named_len() == 0 { - let all_options = ["modifiers", "keycodes", "edits", "modes", "events"]; - all_options - .iter() - .flat_map(|argument| get_records(argument, call.head)) - .collect() - } else { - call.named_iter() - .flat_map(|(argument, _, _)| get_records(argument.item.as_str(), call.head)) - .collect() - }; + let all_options = ["modifiers", "keycodes", "edits", "modes", "events"]; + + let presence = all_options + .iter() + .map(|option| call.has_flag(engine_state, stack, option)) + .collect::, ShellError>>()?; + + let records = all_options + .iter() + .zip(presence) + .filter(|(_, present)| *present) + .flat_map(|(option, _)| get_records(option, call.head)) + .collect(); Ok(Value::list(records, call.head).into_pipeline_data()) } diff --git a/crates/nu-cli/src/eval_cmds.rs b/crates/nu-cli/src/eval_cmds.rs index 13141f6174..ad3a15304d 100644 --- a/crates/nu-cli/src/eval_cmds.rs +++ b/crates/nu-cli/src/eval_cmds.rs @@ -70,6 +70,11 @@ pub fn evaluate_commands( std::process::exit(1); } + if let Some(err) = working_set.compile_errors.first() { + report_error(&working_set, err); + // Not a fatal error, for now + } + (output, working_set.render()) }; diff --git a/crates/nu-cli/src/repl.rs b/crates/nu-cli/src/repl.rs index 7099b70ba8..07272701f3 100644 --- a/crates/nu-cli/src/repl.rs +++ b/crates/nu-cli/src/repl.rs @@ -268,6 +268,9 @@ fn loop_iteration(ctx: LoopContext) -> (bool, Stack, Reedline) { if let Err(err) = engine_state.merge_env(&mut stack, cwd) { report_error_new(engine_state, &err); } + // Check whether $env.NU_USE_IR is set, so that the user can change it in the REPL + // Temporary while IR eval is optional + stack.use_ir = stack.has_env_var(engine_state, "NU_USE_IR"); perf!("merge env", start_time, use_color); start_time = std::time::Instant::now(); diff --git a/crates/nu-cli/src/util.rs b/crates/nu-cli/src/util.rs index d3cf73056f..bcee53c9b0 100644 --- a/crates/nu-cli/src/util.rs +++ b/crates/nu-cli/src/util.rs @@ -262,6 +262,11 @@ fn evaluate_source( return Ok(Some(1)); } + if let Some(err) = working_set.compile_errors.first() { + report_error(&working_set, err); + // Not a fatal error, for now + } + (output, working_set.render()) }; diff --git a/crates/nu-cmd-lang/src/core_commands/const_.rs b/crates/nu-cmd-lang/src/core_commands/const_.rs index f780c5ada9..5b3d03443a 100644 --- a/crates/nu-cmd-lang/src/core_commands/const_.rs +++ b/crates/nu-cmd-lang/src/core_commands/const_.rs @@ -46,6 +46,9 @@ impl Command for Const { call: &Call, _input: PipelineData, ) -> Result { + // This is compiled specially by the IR compiler. The code here is never used when + // running in IR mode. + let call = call.assert_ast_call()?; let var_id = if let Some(id) = call.positional_nth(0).and_then(|pos| pos.as_var()) { id } else { diff --git a/crates/nu-cmd-lang/src/core_commands/do_.rs b/crates/nu-cmd-lang/src/core_commands/do_.rs index adf13cc0bb..bf29a2159a 100644 --- a/crates/nu-cmd-lang/src/core_commands/do_.rs +++ b/crates/nu-cmd-lang/src/core_commands/do_.rs @@ -81,6 +81,10 @@ impl Command for Do { bind_args_to(&mut callee_stack, &block.signature, rest, head)?; let eval_block_with_early_return = get_eval_block_with_early_return(engine_state); + + // Applies to all block evaluation once set true + callee_stack.use_ir = caller_stack.has_env_var(engine_state, "NU_USE_IR"); + let result = eval_block_with_early_return(engine_state, &mut callee_stack, block, input); if has_env { diff --git a/crates/nu-cmd-lang/src/core_commands/for_.rs b/crates/nu-cmd-lang/src/core_commands/for_.rs index 1e90e5f06d..36df743e5f 100644 --- a/crates/nu-cmd-lang/src/core_commands/for_.rs +++ b/crates/nu-cmd-lang/src/core_commands/for_.rs @@ -48,6 +48,9 @@ impl Command for For { call: &Call, _input: PipelineData, ) -> Result { + // This is compiled specially by the IR compiler. The code here is never used when + // running in IR mode. + let call = call.assert_ast_call()?; let head = call.head; let var_id = call .positional_nth(0) diff --git a/crates/nu-cmd-lang/src/core_commands/if_.rs b/crates/nu-cmd-lang/src/core_commands/if_.rs index 738d901759..8667843770 100644 --- a/crates/nu-cmd-lang/src/core_commands/if_.rs +++ b/crates/nu-cmd-lang/src/core_commands/if_.rs @@ -60,6 +60,9 @@ impl Command for If { call: &Call, input: PipelineData, ) -> Result { + // This is compiled specially by the IR compiler. The code here is never used when + // running in IR mode. + let call = call.assert_ast_call()?; let cond = call.positional_nth(0).expect("checked through parser"); let then_block = call .positional_nth(1) @@ -99,6 +102,9 @@ impl Command for If { call: &Call, input: PipelineData, ) -> Result { + // This is compiled specially by the IR compiler. The code here is never used when + // running in IR mode. + let call = call.assert_ast_call()?; let cond = call.positional_nth(0).expect("checked through parser"); let then_block = call .positional_nth(1) diff --git a/crates/nu-cmd-lang/src/core_commands/let_.rs b/crates/nu-cmd-lang/src/core_commands/let_.rs index f2da628c31..46324ef39e 100644 --- a/crates/nu-cmd-lang/src/core_commands/let_.rs +++ b/crates/nu-cmd-lang/src/core_commands/let_.rs @@ -46,6 +46,9 @@ impl Command for Let { call: &Call, input: PipelineData, ) -> Result { + // This is compiled specially by the IR compiler. The code here is never used when + // running in IR mode. + let call = call.assert_ast_call()?; let var_id = call .positional_nth(0) .expect("checked through parser") diff --git a/crates/nu-cmd-lang/src/core_commands/loop_.rs b/crates/nu-cmd-lang/src/core_commands/loop_.rs index 86e18389de..f495c8d3ae 100644 --- a/crates/nu-cmd-lang/src/core_commands/loop_.rs +++ b/crates/nu-cmd-lang/src/core_commands/loop_.rs @@ -37,6 +37,9 @@ impl Command for Loop { call: &Call, _input: PipelineData, ) -> Result { + // This is compiled specially by the IR compiler. The code here is never used when + // running in IR mode. + let call = call.assert_ast_call()?; let head = call.head; let block_id = call .positional_nth(0) diff --git a/crates/nu-cmd-lang/src/core_commands/match_.rs b/crates/nu-cmd-lang/src/core_commands/match_.rs index d28a59cbad..c3a3d61216 100644 --- a/crates/nu-cmd-lang/src/core_commands/match_.rs +++ b/crates/nu-cmd-lang/src/core_commands/match_.rs @@ -43,6 +43,9 @@ impl Command for Match { call: &Call, input: PipelineData, ) -> Result { + // This is compiled specially by the IR compiler. The code here is never used when + // running in IR mode. + let call = call.assert_ast_call()?; let value: Value = call.req(engine_state, stack, 0)?; let matches = call .positional_nth(1) diff --git a/crates/nu-cmd-lang/src/core_commands/mut_.rs b/crates/nu-cmd-lang/src/core_commands/mut_.rs index 5db3c929af..b729590027 100644 --- a/crates/nu-cmd-lang/src/core_commands/mut_.rs +++ b/crates/nu-cmd-lang/src/core_commands/mut_.rs @@ -46,6 +46,9 @@ impl Command for Mut { call: &Call, input: PipelineData, ) -> Result { + // This is compiled specially by the IR compiler. The code here is never used when + // running in IR mode. + let call = call.assert_ast_call()?; let var_id = call .positional_nth(0) .expect("checked through parser") diff --git a/crates/nu-cmd-lang/src/core_commands/overlay/use_.rs b/crates/nu-cmd-lang/src/core_commands/overlay/use_.rs index e8b51fb59b..d6d3ae745a 100644 --- a/crates/nu-cmd-lang/src/core_commands/overlay/use_.rs +++ b/crates/nu-cmd-lang/src/core_commands/overlay/use_.rs @@ -65,9 +65,9 @@ impl Command for OverlayUse { name_arg.item = trim_quotes_str(&name_arg.item).to_string(); let maybe_origin_module_id = - if let Some(overlay_expr) = call.get_parser_info("overlay_expr") { + if let Some(overlay_expr) = call.get_parser_info(caller_stack, "overlay_expr") { if let Expr::Overlay(module_id) = &overlay_expr.expr { - module_id + *module_id } else { return Err(ShellError::NushellFailedSpanned { msg: "Not an overlay".to_string(), @@ -110,7 +110,7 @@ impl Command for OverlayUse { // a) adding a new overlay // b) refreshing an active overlay (the origin module changed) - let module = engine_state.get_module(*module_id); + let module = engine_state.get_module(module_id); // Evaluate the export-env block (if any) and keep its environment if let Some(block_id) = module.env_block { @@ -118,7 +118,7 @@ impl Command for OverlayUse { &name_arg.item, engine_state, caller_stack, - get_dirs_var_from_call(call), + get_dirs_var_from_call(caller_stack, call), )?; let block = engine_state.get_block(block_id); diff --git a/crates/nu-cmd-lang/src/core_commands/try_.rs b/crates/nu-cmd-lang/src/core_commands/try_.rs index f99825b88d..2309897a1b 100644 --- a/crates/nu-cmd-lang/src/core_commands/try_.rs +++ b/crates/nu-cmd-lang/src/core_commands/try_.rs @@ -47,6 +47,9 @@ impl Command for Try { call: &Call, input: PipelineData, ) -> Result { + // This is compiled specially by the IR compiler. The code here is never used when + // running in IR mode. + let call = call.assert_ast_call()?; let try_block = call .positional_nth(0) .expect("checked through parser") diff --git a/crates/nu-cmd-lang/src/core_commands/use_.rs b/crates/nu-cmd-lang/src/core_commands/use_.rs index b0f3648304..7f544fa5d4 100644 --- a/crates/nu-cmd-lang/src/core_commands/use_.rs +++ b/crates/nu-cmd-lang/src/core_commands/use_.rs @@ -57,7 +57,7 @@ This command is a parser keyword. For details, check: let Some(Expression { expr: Expr::ImportPattern(import_pattern), .. - }) = call.get_parser_info("import_pattern") + }) = call.get_parser_info(caller_stack, "import_pattern") else { return Err(ShellError::GenericError { error: "Unexpected import".into(), @@ -68,6 +68,9 @@ This command is a parser keyword. For details, check: }); }; + // Necessary so that we can modify the stack. + let import_pattern = import_pattern.clone(); + if let Some(module_id) = import_pattern.head.id { // Add constants for var_id in &import_pattern.constants { @@ -99,7 +102,7 @@ This command is a parser keyword. For details, check: &module_arg_str, engine_state, caller_stack, - get_dirs_var_from_call(call), + get_dirs_var_from_call(caller_stack, call), )?; let maybe_parent = maybe_file_path .as_ref() diff --git a/crates/nu-cmd-lang/src/core_commands/while_.rs b/crates/nu-cmd-lang/src/core_commands/while_.rs index 22bb4c5dbd..a67c47fcab 100644 --- a/crates/nu-cmd-lang/src/core_commands/while_.rs +++ b/crates/nu-cmd-lang/src/core_commands/while_.rs @@ -46,6 +46,9 @@ impl Command for While { call: &Call, _input: PipelineData, ) -> Result { + // This is compiled specially by the IR compiler. The code here is never used when + // running in IR mode. + let call = call.assert_ast_call()?; let head = call.head; let cond = call.positional_nth(0).expect("checked through parser"); let block_id = call diff --git a/crates/nu-command/src/bytes/build_.rs b/crates/nu-command/src/bytes/build_.rs index f6b1327621..9a3599a071 100644 --- a/crates/nu-command/src/bytes/build_.rs +++ b/crates/nu-command/src/bytes/build_.rs @@ -49,10 +49,8 @@ impl Command for BytesBuild { _input: PipelineData, ) -> Result { let mut output = vec![]; - for val in call.rest_iter_flattened(0, |expr| { - let eval_expression = get_eval_expression(engine_state); - eval_expression(engine_state, stack, expr) - })? { + let eval_expression = get_eval_expression(engine_state); + for val in call.rest_iter_flattened(engine_state, stack, eval_expression, 0)? { let val_span = val.span(); match val { Value::Binary { mut val, .. } => output.append(&mut val), diff --git a/crates/nu-command/src/debug/explain.rs b/crates/nu-command/src/debug/explain.rs index b451d6916a..710e37935a 100644 --- a/crates/nu-command/src/debug/explain.rs +++ b/crates/nu-command/src/debug/explain.rs @@ -1,6 +1,6 @@ use nu_engine::{command_prelude::*, get_eval_expression}; use nu_protocol::{ - ast::{Argument, Block, Expr, Expression}, + ast::{self, Argument, Block, Expr, Expression}, engine::Closure, }; @@ -106,7 +106,7 @@ pub fn get_pipeline_elements( fn get_arguments( engine_state: &EngineState, stack: &mut Stack, - call: &Call, + call: &ast::Call, eval_expression_fn: fn(&EngineState, &mut Stack, &Expression) -> Result, ) -> Vec { let mut arg_value = vec![]; diff --git a/crates/nu-command/src/debug/metadata.rs b/crates/nu-command/src/debug/metadata.rs index 543e598e28..245c150cea 100644 --- a/crates/nu-command/src/debug/metadata.rs +++ b/crates/nu-command/src/debug/metadata.rs @@ -28,6 +28,10 @@ impl Command for Metadata { .category(Category::Debug) } + fn requires_ast_for_arguments(&self) -> bool { + true + } + fn run( &self, engine_state: &EngineState, @@ -35,7 +39,7 @@ impl Command for Metadata { call: &Call, input: PipelineData, ) -> Result { - let arg = call.positional_nth(0); + let arg = call.positional_nth(stack, 0); let head = call.head; match arg { diff --git a/crates/nu-command/src/debug/mod.rs b/crates/nu-command/src/debug/mod.rs index f19ddab916..ec18c2be87 100644 --- a/crates/nu-command/src/debug/mod.rs +++ b/crates/nu-command/src/debug/mod.rs @@ -10,6 +10,7 @@ mod profile; mod timeit; mod view; mod view_files; +mod view_ir; mod view_source; mod view_span; @@ -25,5 +26,6 @@ pub use profile::DebugProfile; pub use timeit::TimeIt; pub use view::View; pub use view_files::ViewFiles; +pub use view_ir::ViewIr; pub use view_source::ViewSource; pub use view_span::ViewSpan; diff --git a/crates/nu-command/src/debug/timeit.rs b/crates/nu-command/src/debug/timeit.rs index a445679b81..7a48644a6d 100644 --- a/crates/nu-command/src/debug/timeit.rs +++ b/crates/nu-command/src/debug/timeit.rs @@ -32,6 +32,10 @@ impl Command for TimeIt { vec!["timing", "timer", "benchmark", "measure"] } + fn requires_ast_for_arguments(&self) -> bool { + true + } + fn run( &self, engine_state: &EngineState, @@ -39,13 +43,14 @@ impl Command for TimeIt { call: &Call, input: PipelineData, ) -> Result { - let command_to_run = call.positional_nth(0); + // reset outdest, so the command can write to stdout and stderr. + let stack = &mut stack.push_redirection(None, None); + + let command_to_run = call.positional_nth(stack, 0); // Get the start time after all other computation has been done. let start_time = Instant::now(); - // reset outdest, so the command can write to stdout and stderr. - let stack = &mut stack.push_redirection(None, None); if let Some(command_to_run) = command_to_run { if let Some(block_id) = command_to_run.as_block() { let eval_block = get_eval_block(engine_state); @@ -53,7 +58,8 @@ impl Command for TimeIt { eval_block(engine_state, stack, block, input)? } else { let eval_expression_with_input = get_eval_expression_with_input(engine_state); - eval_expression_with_input(engine_state, stack, command_to_run, input)?.0 + let expression = &command_to_run.clone(); + eval_expression_with_input(engine_state, stack, expression, input)?.0 } } else { PipelineData::empty() diff --git a/crates/nu-command/src/debug/view_ir.rs b/crates/nu-command/src/debug/view_ir.rs new file mode 100644 index 0000000000..df4f6cad6b --- /dev/null +++ b/crates/nu-command/src/debug/view_ir.rs @@ -0,0 +1,83 @@ +use nu_engine::command_prelude::*; +use nu_protocol::engine::Closure; + +#[derive(Clone)] +pub struct ViewIr; + +impl Command for ViewIr { + fn name(&self) -> &str { + "view ir" + } + + fn signature(&self) -> Signature { + Signature::new(self.name()) + .required( + "closure", + SyntaxShape::Closure(None), + "The closure to see compiled code for.", + ) + .switch( + "json", + "Dump the raw block data as JSON (unstable).", + Some('j'), + ) + .input_output_type(Type::Nothing, Type::String) + } + + fn usage(&self) -> &str { + "View the compiled IR code for a block of code." + } + + fn run( + &self, + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, + _input: PipelineData, + ) -> Result { + let closure: Closure = call.req(engine_state, stack, 0)?; + let json = call.has_flag(engine_state, stack, "json")?; + + let block = engine_state.get_block(closure.block_id); + let ir_block = block + .ir_block + .as_ref() + .ok_or_else(|| ShellError::GenericError { + error: "Can't view IR for this block".into(), + msg: "block is missing compiled representation".into(), + span: block.span, + help: Some("the IrBlock is probably missing due to a compilation error".into()), + inner: vec![], + })?; + + let formatted = if json { + let formatted_instructions = ir_block + .instructions + .iter() + .map(|instruction| { + instruction + .display(engine_state, &ir_block.data) + .to_string() + }) + .collect::>(); + + serde_json::to_string_pretty(&serde_json::json!({ + "block_id": closure.block_id, + "span": block.span, + "ir_block": ir_block, + "formatted_instructions": formatted_instructions, + })) + .map_err(|err| ShellError::GenericError { + error: "JSON serialization failed".into(), + msg: err.to_string(), + span: Some(call.head), + help: None, + inner: vec![], + })? + } else { + format!("{}", ir_block.display(engine_state)) + }; + + Ok(Value::string(formatted, call.head).into_pipeline_data()) + } +} diff --git a/crates/nu-command/src/default_context.rs b/crates/nu-command/src/default_context.rs index 847d2349ed..b270a78dce 100644 --- a/crates/nu-command/src/default_context.rs +++ b/crates/nu-command/src/default_context.rs @@ -154,6 +154,7 @@ pub fn add_shell_command_context(mut engine_state: EngineState) -> EngineState { TimeIt, View, ViewFiles, + ViewIr, ViewSource, ViewSpan, }; diff --git a/crates/nu-command/src/env/export_env.rs b/crates/nu-command/src/env/export_env.rs index 00f2c73ef4..7b583c9959 100644 --- a/crates/nu-command/src/env/export_env.rs +++ b/crates/nu-command/src/env/export_env.rs @@ -33,6 +33,10 @@ impl Command for ExportEnv { CommandType::Keyword } + fn requires_ast_for_arguments(&self) -> bool { + true + } + fn run( &self, engine_state: &EngineState, @@ -41,7 +45,7 @@ impl Command for ExportEnv { input: PipelineData, ) -> Result { let block_id = call - .positional_nth(0) + .positional_nth(caller_stack, 0) .expect("checked through parser") .as_block() .expect("internal error: missing block"); diff --git a/crates/nu-command/src/env/source_env.rs b/crates/nu-command/src/env/source_env.rs index 0d8b118e8d..1813a92f1f 100644 --- a/crates/nu-command/src/env/source_env.rs +++ b/crates/nu-command/src/env/source_env.rs @@ -56,7 +56,7 @@ impl Command for SourceEnv { &source_filename.item, engine_state, caller_stack, - get_dirs_var_from_call(call), + get_dirs_var_from_call(caller_stack, call), )? { PathBuf::from(&path) } else { diff --git a/crates/nu-command/src/filesystem/du.rs b/crates/nu-command/src/filesystem/du.rs index 93f08f7785..d34892ace9 100644 --- a/crates/nu-command/src/filesystem/du.rs +++ b/crates/nu-command/src/filesystem/du.rs @@ -102,7 +102,7 @@ impl Command for Du { let current_dir = current_dir(engine_state, stack)?; let paths = get_rest_for_glob_pattern(engine_state, stack, call, 0)?; - let paths = if call.rest_iter(0).count() == 0 { + let paths = if !call.has_positional_args(stack, 0) { None } else { Some(paths) diff --git a/crates/nu-command/src/filesystem/ls.rs b/crates/nu-command/src/filesystem/ls.rs index f465a93dc1..807e4f3409 100644 --- a/crates/nu-command/src/filesystem/ls.rs +++ b/crates/nu-command/src/filesystem/ls.rs @@ -108,7 +108,7 @@ impl Command for Ls { }; let pattern_arg = get_rest_for_glob_pattern(engine_state, stack, call, 0)?; - let input_pattern_arg = if call.rest_iter(0).count() == 0 { + let input_pattern_arg = if !call.has_positional_args(stack, 0) { None } else { Some(pattern_arg) diff --git a/crates/nu-command/src/filesystem/open.rs b/crates/nu-command/src/filesystem/open.rs index e654b27f05..0351d1d9b2 100644 --- a/crates/nu-command/src/filesystem/open.rs +++ b/crates/nu-command/src/filesystem/open.rs @@ -1,7 +1,7 @@ use super::util::get_rest_for_glob_pattern; #[allow(deprecated)] use nu_engine::{command_prelude::*, current_dir, get_eval_block}; -use nu_protocol::{ByteStream, DataSource, NuGlob, PipelineMetadata}; +use nu_protocol::{ast, ByteStream, DataSource, NuGlob, PipelineMetadata}; use std::path::Path; #[cfg(feature = "sqlite")] @@ -56,7 +56,7 @@ impl Command for Open { let mut paths = get_rest_for_glob_pattern(engine_state, stack, call, 0)?; let eval_block = get_eval_block(engine_state); - if paths.is_empty() && call.rest_iter(0).next().is_none() { + if paths.is_empty() && !call.has_positional_args(stack, 0) { // try to use path from pipeline input if there were no positional or spread args let (filename, span) = match input { PipelineData::Value(val, ..) => { @@ -180,7 +180,8 @@ impl Command for Open { let block = engine_state.get_block(block_id); eval_block(engine_state, stack, block, stream) } else { - decl.run(engine_state, stack, &Call::new(call_span), stream) + let call = ast::Call::new(call_span); + decl.run(engine_state, stack, &(&call).into(), stream) }; output.push(command_output.map_err(|inner| { ShellError::GenericError{ diff --git a/crates/nu-command/src/filesystem/save.rs b/crates/nu-command/src/filesystem/save.rs index 6ca5c09559..be5073ef20 100644 --- a/crates/nu-command/src/filesystem/save.rs +++ b/crates/nu-command/src/filesystem/save.rs @@ -4,10 +4,8 @@ use nu_engine::get_eval_block; use nu_engine::{command_prelude::*, current_dir}; use nu_path::expand_path_with; use nu_protocol::{ - ast::{Expr, Expression}, - byte_stream::copy_with_signals, - process::ChildPipe, - ByteStreamSource, DataSource, OutDest, PipelineMetadata, Signals, + ast, byte_stream::copy_with_signals, process::ChildPipe, ByteStreamSource, DataSource, OutDest, + PipelineMetadata, Signals, }; use std::{ fs::File, @@ -69,24 +67,6 @@ impl Command for Save { let append = call.has_flag(engine_state, stack, "append")?; let force = call.has_flag(engine_state, stack, "force")?; let progress = call.has_flag(engine_state, stack, "progress")?; - let out_append = if let Some(Expression { - expr: Expr::Bool(out_append), - .. - }) = call.get_parser_info("out-append") - { - *out_append - } else { - false - }; - let err_append = if let Some(Expression { - expr: Expr::Bool(err_append), - .. - }) = call.get_parser_info("err-append") - { - *err_append - } else { - false - }; let span = call.head; #[allow(deprecated)] @@ -109,14 +89,7 @@ impl Command for Save { PipelineData::ByteStream(stream, metadata) => { check_saving_to_source_file(metadata.as_ref(), &path, stderr_path.as_ref())?; - let (file, stderr_file) = get_files( - &path, - stderr_path.as_ref(), - append, - out_append, - err_append, - force, - )?; + let (file, stderr_file) = get_files(&path, stderr_path.as_ref(), append, force)?; let size = stream.known_size(); let signals = engine_state.signals(); @@ -221,14 +194,7 @@ impl Command for Save { stderr_path.as_ref(), )?; - let (mut file, _) = get_files( - &path, - stderr_path.as_ref(), - append, - out_append, - err_append, - force, - )?; + let (mut file, _) = get_files(&path, stderr_path.as_ref(), append, force)?; for val in ls { file.write_all(&value_to_bytes(val)?) .map_err(|err| ShellError::IOError { @@ -258,14 +224,7 @@ impl Command for Save { input_to_bytes(input, Path::new(&path.item), raw, engine_state, stack, span)?; // Only open file after successful conversion - let (mut file, _) = get_files( - &path, - stderr_path.as_ref(), - append, - out_append, - err_append, - force, - )?; + let (mut file, _) = get_files(&path, stderr_path.as_ref(), append, force)?; file.write_all(&bytes).map_err(|err| ShellError::IOError { msg: err.to_string(), @@ -397,7 +356,8 @@ fn convert_to_extension( let eval_block = get_eval_block(engine_state); eval_block(engine_state, stack, block, input) } else { - decl.run(engine_state, stack, &Call::new(span), input) + let call = ast::Call::new(span); + decl.run(engine_state, stack, &(&call).into(), input) } } else { Ok(input) @@ -473,19 +433,17 @@ fn get_files( path: &Spanned, stderr_path: Option<&Spanned>, append: bool, - out_append: bool, - err_append: bool, force: bool, ) -> Result<(File, Option), ShellError> { // First check both paths - let (path, path_span) = prepare_path(path, append || out_append, force)?; + let (path, path_span) = prepare_path(path, append, force)?; let stderr_path_and_span = stderr_path .as_ref() - .map(|stderr_path| prepare_path(stderr_path, append || err_append, force)) + .map(|stderr_path| prepare_path(stderr_path, append, force)) .transpose()?; // Only if both files can be used open and possibly truncate them - let file = open_file(path, path_span, append || out_append)?; + let file = open_file(path, path_span, append)?; let stderr_file = stderr_path_and_span .map(|(stderr_path, stderr_path_span)| { @@ -498,7 +456,7 @@ fn get_files( inner: vec![], }) } else { - open_file(stderr_path, stderr_path_span, append || err_append) + open_file(stderr_path, stderr_path_span, append) } }) .transpose()?; diff --git a/crates/nu-command/src/filesystem/util.rs b/crates/nu-command/src/filesystem/util.rs index 1b755875bd..de32d204a0 100644 --- a/crates/nu-command/src/filesystem/util.rs +++ b/crates/nu-command/src/filesystem/util.rs @@ -1,6 +1,6 @@ use dialoguer::Input; use nu_engine::{command_prelude::*, get_eval_expression}; -use nu_protocol::{ast::Expr, FromValue, NuGlob}; +use nu_protocol::{FromValue, NuGlob}; use std::{ error::Error, path::{Path, PathBuf}, @@ -92,42 +92,19 @@ pub fn is_older(src: &Path, dst: &Path) -> Option { /// Get rest arguments from given `call`, starts with `starting_pos`. /// -/// It's similar to `call.rest`, except that it always returns NuGlob. And if input argument has -/// Type::Glob, the NuGlob is unquoted, which means it's required to expand. +/// It's similar to `call.rest`, except that it always returns NuGlob. pub fn get_rest_for_glob_pattern( engine_state: &EngineState, stack: &mut Stack, call: &Call, starting_pos: usize, ) -> Result>, ShellError> { - let mut output = vec![]; let eval_expression = get_eval_expression(engine_state); - for result in call.rest_iter_flattened(starting_pos, |expr| { - let result = eval_expression(engine_state, stack, expr); - match result { - Err(e) => Err(e), - Ok(result) => { - let span = result.span(); - // convert from string to quoted string if expr is a variable - // or string interpolation - match result { - Value::String { val, .. } - if matches!( - &expr.expr, - Expr::FullCellPath(_) | Expr::StringInterpolation(_) - ) => - { - // should not expand if given input type is not glob. - Ok(Value::glob(val, expr.ty != Type::Glob, span)) - } - other => Ok(other), - } - } - } - })? { - output.push(FromValue::from_value(result)?); - } - - Ok(output) + call.rest_iter_flattened(engine_state, stack, eval_expression, starting_pos)? + .into_iter() + // This used to be much more complex, but I think `FromValue` should be able to handle the + // nuance here. + .map(FromValue::from_value) + .collect() } diff --git a/crates/nu-command/src/filters/transpose.rs b/crates/nu-command/src/filters/transpose.rs index 95aa382e4f..4a14c1aea2 100644 --- a/crates/nu-command/src/filters/transpose.rs +++ b/crates/nu-command/src/filters/transpose.rs @@ -149,27 +149,27 @@ pub fn transpose( if !args.rest.is_empty() && args.header_row { return Err(ShellError::IncompatibleParametersSingle { msg: "Can not provide header names and use `--header-row`".into(), - span: call.get_named_arg("header-row").expect("has flag").span, + span: call.get_flag_span(stack, "header-row").expect("has flag"), }); } if !args.header_row && args.keep_all { return Err(ShellError::IncompatibleParametersSingle { msg: "Can only be used with `--header-row`(`-r`)".into(), - span: call.get_named_arg("keep-all").expect("has flag").span, + span: call.get_flag_span(stack, "keep-all").expect("has flag"), }); } if !args.header_row && args.keep_last { return Err(ShellError::IncompatibleParametersSingle { msg: "Can only be used with `--header-row`(`-r`)".into(), - span: call.get_named_arg("keep-last").expect("has flag").span, + span: call.get_flag_span(stack, "keep-last").expect("has flag"), }); } if args.keep_all && args.keep_last { return Err(ShellError::IncompatibleParameters { left_message: "can't use `--keep-last` at the same time".into(), - left_span: call.get_named_arg("keep-last").expect("has flag").span, + left_span: call.get_flag_span(stack, "keep-last").expect("has flag"), right_message: "because of `--keep-all`".into(), - right_span: call.get_named_arg("keep-all").expect("has flag").span, + right_span: call.get_flag_span(stack, "keep-all").expect("has flag"), }); } diff --git a/crates/nu-command/src/filters/utils.rs b/crates/nu-command/src/filters/utils.rs index 3ebd4bafbd..4c67667e8e 100644 --- a/crates/nu-command/src/filters/utils.rs +++ b/crates/nu-command/src/filters/utils.rs @@ -1,7 +1,6 @@ use nu_engine::{CallExt, ClosureEval}; use nu_protocol::{ - ast::Call, - engine::{Closure, EngineState, Stack}, + engine::{Call, Closure, EngineState, Stack}, IntoPipelineData, PipelineData, ShellError, Span, Value, }; diff --git a/crates/nu-command/src/generators/cal.rs b/crates/nu-command/src/generators/cal.rs index a257f3ab7c..018d9370dd 100644 --- a/crates/nu-command/src/generators/cal.rs +++ b/crates/nu-command/src/generators/cal.rs @@ -1,7 +1,7 @@ use chrono::{Datelike, Local, NaiveDate}; use nu_color_config::StyleComputer; use nu_engine::command_prelude::*; -use nu_protocol::ast::{Expr, Expression}; +use nu_protocol::ast::{self, Expr, Expression}; use std::collections::VecDeque; @@ -143,7 +143,7 @@ pub fn cal( style_computer, )?; - let mut table_no_index = Call::new(Span::unknown()); + let mut table_no_index = ast::Call::new(Span::unknown()); table_no_index.add_named(( Spanned { item: "index".to_string(), @@ -160,7 +160,12 @@ pub fn cal( let cal_table_output = Value::list(calendar_vec_deque.into_iter().collect(), tag).into_pipeline_data(); if !arguments.as_table { - crate::Table.run(engine_state, stack, &table_no_index, cal_table_output) + crate::Table.run( + engine_state, + stack, + &(&table_no_index).into(), + cal_table_output, + ) } else { Ok(cal_table_output) } diff --git a/crates/nu-command/src/math/utils.rs b/crates/nu-command/src/math/utils.rs index 765c1f42fb..62f96ea073 100644 --- a/crates/nu-command/src/math/utils.rs +++ b/crates/nu-command/src/math/utils.rs @@ -1,6 +1,6 @@ use core::slice; use indexmap::IndexMap; -use nu_protocol::{ast::Call, IntoPipelineData, PipelineData, ShellError, Signals, Span, Value}; +use nu_protocol::{engine::Call, IntoPipelineData, PipelineData, ShellError, Signals, Span, Value}; pub fn run_with_function( call: &Call, diff --git a/crates/nu-command/src/platform/ansi/ansi_.rs b/crates/nu-command/src/platform/ansi/ansi_.rs index 29603be9e7..23161eb7bf 100644 --- a/crates/nu-command/src/platform/ansi/ansi_.rs +++ b/crates/nu-command/src/platform/ansi/ansi_.rs @@ -676,7 +676,7 @@ Operating system commands: } }; - let output = heavy_lifting(code, escape, osc, call)?; + let output = heavy_lifting(code, escape, osc, stack, call)?; Ok(Value::string(output, call.head).into_pipeline_data()) } @@ -713,26 +713,30 @@ Operating system commands: } }; - let output = heavy_lifting(code, escape, osc, call)?; + let output = heavy_lifting(code, escape, osc, &Stack::new(), call)?; Ok(Value::string(output, call.head).into_pipeline_data()) } } -fn heavy_lifting(code: Value, escape: bool, osc: bool, call: &Call) -> Result { +fn heavy_lifting( + code: Value, + escape: bool, + osc: bool, + stack: &Stack, + call: &Call, +) -> Result { let param_is_string = matches!(code, Value::String { .. }); if escape && osc { return Err(ShellError::IncompatibleParameters { left_message: "escape".into(), left_span: call - .get_named_arg("escape") - .expect("Unexpected missing argument") - .span, + .get_flag_span(stack, "escape") + .expect("Unexpected missing argument"), right_message: "osc".into(), right_span: call - .get_named_arg("osc") - .expect("Unexpected missing argument") - .span, + .get_flag_span(stack, "osc") + .expect("Unexpected missing argument"), }); } let code_string = if param_is_string { @@ -744,10 +748,7 @@ fn heavy_lifting(code: Value, escape: bool, osc: bool, call: &Call) -> Result = code_string.chars().collect(); if code_vec[0] == '\\' { - let span = match call.get_flag_expr("escape") { - Some(expr) => expr.span, - None => call.head, - }; + let span = call.get_flag_span(stack, "escape").unwrap_or(call.head); return Err(ShellError::TypeMismatch { err_message: "no need for escape characters".into(), diff --git a/crates/nu-command/src/platform/is_terminal.rs b/crates/nu-command/src/platform/is_terminal.rs index c67329e839..2195f3ff8a 100644 --- a/crates/nu-command/src/platform/is_terminal.rs +++ b/crates/nu-command/src/platform/is_terminal.rs @@ -58,7 +58,7 @@ impl Command for IsTerminal { _ => { return Err(ShellError::IncompatibleParametersSingle { msg: "Only one stream may be checked".into(), - span: Span::merge_many(call.arguments.iter().map(|arg| arg.span())), + span: call.arguments_span(), }); } }; diff --git a/crates/nu-command/src/platform/kill.rs b/crates/nu-command/src/platform/kill.rs index 2e47ee8c78..1cf6f15f01 100644 --- a/crates/nu-command/src/platform/kill.rs +++ b/crates/nu-command/src/platform/kill.rs @@ -84,27 +84,26 @@ impl Command for Kill { { return Err(ShellError::IncompatibleParameters { left_message: "force".to_string(), - left_span: call - .get_named_arg("force") - .ok_or_else(|| ShellError::GenericError { + left_span: call.get_flag_span(stack, "force").ok_or_else(|| { + ShellError::GenericError { error: "Flag error".into(), msg: "flag force not found".into(), span: Some(call.head), help: None, inner: vec![], - })? - .span, + } + })?, right_message: "signal".to_string(), right_span: Span::merge( - call.get_named_arg("signal") - .ok_or_else(|| ShellError::GenericError { + call.get_flag_span(stack, "signal").ok_or_else(|| { + ShellError::GenericError { error: "Flag error".into(), msg: "flag signal not found".into(), span: Some(call.head), help: None, inner: vec![], - })? - .span, + } + })?, signal_span, ), }); diff --git a/crates/nu-command/src/strings/encode_decode/base64.rs b/crates/nu-command/src/strings/encode_decode/base64.rs index afc143983e..dd9289a141 100644 --- a/crates/nu-command/src/strings/encode_decode/base64.rs +++ b/crates/nu-command/src/strings/encode_decode/base64.rs @@ -8,8 +8,8 @@ use base64::{ }; use nu_cmd_base::input_handler::{operate as general_operate, CmdArgument}; use nu_protocol::{ - ast::{Call, CellPath}, - engine::EngineState, + ast::CellPath, + engine::{Call, EngineState}, PipelineData, ShellError, Span, Spanned, Value, }; diff --git a/crates/nu-command/src/strings/mod.rs b/crates/nu-command/src/strings/mod.rs index d1ebf540e5..8b5af2dec4 100644 --- a/crates/nu-command/src/strings/mod.rs +++ b/crates/nu-command/src/strings/mod.rs @@ -17,8 +17,7 @@ pub use str_::*; use nu_engine::CallExt; use nu_protocol::{ - ast::Call, - engine::{EngineState, Stack, StateWorkingSet}, + engine::{Call, EngineState, Stack, StateWorkingSet}, ShellError, }; diff --git a/crates/nu-command/src/system/nu_check.rs b/crates/nu-command/src/system/nu_check.rs index f9e0879c00..334569c79e 100644 --- a/crates/nu-command/src/system/nu_check.rs +++ b/crates/nu-command/src/system/nu_check.rs @@ -87,7 +87,7 @@ impl Command for NuCheck { &path_str.item, engine_state, stack, - get_dirs_var_from_call(call), + get_dirs_var_from_call(stack, call), ) { Ok(path) => { if let Some(path) = path { diff --git a/crates/nu-command/src/system/run_external.rs b/crates/nu-command/src/system/run_external.rs index 06bc5a69ca..a5cf343970 100644 --- a/crates/nu-command/src/system/run_external.rs +++ b/crates/nu-command/src/system/run_external.rs @@ -1,9 +1,7 @@ use nu_cmd_base::hook::eval_hook; use nu_engine::{command_prelude::*, env_to_strings, get_eval_expression}; use nu_path::{dots::expand_ndots, expand_tilde}; -use nu_protocol::{ - ast::Expression, did_you_mean, process::ChildProcess, ByteStream, NuGlob, OutDest, Signals, -}; +use nu_protocol::{did_you_mean, process::ChildProcess, ByteStream, NuGlob, OutDest, Signals}; use nu_system::ForegroundChild; use nu_utils::IgnoreCaseExt; use pathdiff::diff_paths; @@ -222,20 +220,21 @@ pub fn eval_arguments_from_call( call: &Call, ) -> Result>, ShellError> { let cwd = engine_state.cwd(Some(stack))?; - let mut args: Vec> = vec![]; - for (expr, spread) in call.rest_iter(1) { - for arg in eval_argument(engine_state, stack, expr, spread)? { - match arg { - // Expand globs passed to run-external - Value::Glob { val, no_expand, .. } if !no_expand => args.extend( - expand_glob(&val, cwd.as_ref(), expr.span, engine_state.signals())? - .into_iter() - .map(|s| s.into_spanned(expr.span)), - ), - other => { - args.push(OsString::from(coerce_into_string(other)?).into_spanned(expr.span)) - } - } + let eval_expression = get_eval_expression(engine_state); + let call_args = call.rest_iter_flattened(engine_state, stack, eval_expression, 1)?; + let mut args: Vec> = Vec::with_capacity(call_args.len()); + + for arg in call_args { + let span = arg.span(); + match arg { + // Expand globs passed to run-external + Value::Glob { val, no_expand, .. } if !no_expand => args.extend( + expand_glob(&val, cwd.as_std_path(), span, engine_state.signals())? + .into_iter() + .map(|s| s.into_spanned(span)), + ), + other => args + .push(OsString::from(coerce_into_string(engine_state, other)?).into_spanned(span)), } } Ok(args) @@ -243,42 +242,17 @@ pub fn eval_arguments_from_call( /// Custom `coerce_into_string()`, including globs, since those are often args to `run-external` /// as well -fn coerce_into_string(val: Value) -> Result { +fn coerce_into_string(engine_state: &EngineState, val: Value) -> Result { match val { + Value::List { .. } => Err(ShellError::CannotPassListToExternal { + arg: String::from_utf8_lossy(engine_state.get_span_contents(val.span())).into_owned(), + span: val.span(), + }), Value::Glob { val, .. } => Ok(val), _ => val.coerce_into_string(), } } -/// Evaluate an argument, returning more than one value if it was a list to be spread. -fn eval_argument( - engine_state: &EngineState, - stack: &mut Stack, - expr: &Expression, - spread: bool, -) -> Result, ShellError> { - let eval = get_eval_expression(engine_state); - match eval(engine_state, stack, expr)? { - Value::List { vals, .. } => { - if spread { - Ok(vals) - } else { - Err(ShellError::CannotPassListToExternal { - arg: String::from_utf8_lossy(engine_state.get_span_contents(expr.span)).into(), - span: expr.span, - }) - } - } - value => { - if spread { - Err(ShellError::CannotSpreadAsList { span: expr.span }) - } else { - Ok(vec![value]) - } - } - } -} - /// Performs glob expansion on `arg`. If the expansion found no matches or the pattern /// is not a valid glob, then this returns the original string as the expansion result. /// diff --git a/crates/nu-command/src/system/uname.rs b/crates/nu-command/src/system/uname.rs index e267fcaeb2..0bcb749f02 100644 --- a/crates/nu-command/src/system/uname.rs +++ b/crates/nu-command/src/system/uname.rs @@ -1,10 +1,5 @@ -use nu_protocol::record; -use nu_protocol::Value; -use nu_protocol::{ - ast::Call, - engine::{Command, EngineState, Stack}, - Category, Example, PipelineData, ShellError, Signature, Type, -}; +use nu_engine::command_prelude::*; +use nu_protocol::{record, Value}; #[derive(Clone)] pub struct UName; diff --git a/crates/nu-command/src/viewers/table.rs b/crates/nu-command/src/viewers/table.rs index e3738a3952..190c3659af 100644 --- a/crates/nu-command/src/viewers/table.rs +++ b/crates/nu-command/src/viewers/table.rs @@ -344,7 +344,7 @@ fn get_theme_flag( struct CmdInput<'a> { engine_state: &'a EngineState, stack: &'a mut Stack, - call: &'a Call, + call: &'a Call<'a>, data: PipelineData, } @@ -352,7 +352,7 @@ impl<'a> CmdInput<'a> { fn new( engine_state: &'a EngineState, stack: &'a mut Stack, - call: &'a Call, + call: &'a Call<'a>, data: PipelineData, ) -> Self { Self { diff --git a/crates/nu-engine/Cargo.toml b/crates/nu-engine/Cargo.toml index 3e6c3f787b..c416a2f590 100644 --- a/crates/nu-engine/Cargo.toml +++ b/crates/nu-engine/Cargo.toml @@ -15,6 +15,7 @@ nu-protocol = { path = "../nu-protocol", features = ["plugin"], version = "0.95. nu-path = { path = "../nu-path", version = "0.95.1" } nu-glob = { path = "../nu-glob", version = "0.95.1" } nu-utils = { path = "../nu-utils", version = "0.95.1" } +log = { workspace = true } [features] -plugin = [] \ No newline at end of file +plugin = [] diff --git a/crates/nu-engine/src/call_ext.rs b/crates/nu-engine/src/call_ext.rs index daedb24a1f..d3f36215e6 100644 --- a/crates/nu-engine/src/call_ext.rs +++ b/crates/nu-engine/src/call_ext.rs @@ -1,10 +1,10 @@ use crate::eval_expression; use nu_protocol::{ - ast::Call, + ast, debugger::WithoutDebug, - engine::{EngineState, Stack, StateWorkingSet}, + engine::{self, EngineState, Stack, StateWorkingSet}, eval_const::eval_constant, - FromValue, ShellError, Value, + ir, FromValue, ShellError, Span, Value, }; pub trait CallExt { @@ -23,6 +23,9 @@ pub trait CallExt { name: &str, ) -> Result, ShellError>; + /// Efficiently get the span of a flag argument + fn get_flag_span(&self, stack: &Stack, name: &str) -> Option; + fn rest( &self, engine_state: &EngineState, @@ -56,9 +59,12 @@ pub trait CallExt { stack: &mut Stack, name: &str, ) -> Result; + + /// True if the command has any positional or rest arguments, excluding before the given index. + fn has_positional_args(&self, stack: &Stack, starting_pos: usize) -> bool; } -impl CallExt for Call { +impl CallExt for ast::Call { fn has_flag( &self, engine_state: &EngineState, @@ -104,6 +110,10 @@ impl CallExt for Call { } } + fn get_flag_span(&self, _stack: &Stack, name: &str) -> Option { + self.get_named_arg(name).map(|arg| arg.span) + } + fn rest( &self, engine_state: &EngineState, @@ -189,4 +199,205 @@ impl CallExt for Call { }) } } + + fn has_positional_args(&self, _stack: &Stack, starting_pos: usize) -> bool { + self.rest_iter(starting_pos).next().is_some() + } +} + +impl CallExt for ir::Call { + fn has_flag( + &self, + _engine_state: &EngineState, + stack: &mut Stack, + flag_name: &str, + ) -> Result { + Ok(self + .named_iter(stack) + .find(|(name, _)| name.item == flag_name) + .is_some_and(|(_, value)| { + // Handle --flag=false + !matches!(value, Some(Value::Bool { val: false, .. })) + })) + } + + fn get_flag( + &self, + _engine_state: &EngineState, + stack: &mut Stack, + name: &str, + ) -> Result, ShellError> { + if let Some(val) = self.get_named_arg(stack, name) { + T::from_value(val.clone()).map(Some) + } else { + Ok(None) + } + } + + fn get_flag_span(&self, stack: &Stack, name: &str) -> Option { + self.named_iter(stack) + .find_map(|(i_name, _)| (i_name.item == name).then_some(i_name.span)) + } + + fn rest( + &self, + _engine_state: &EngineState, + stack: &mut Stack, + starting_pos: usize, + ) -> Result, ShellError> { + self.rest_iter_flattened(stack, starting_pos)? + .into_iter() + .map(T::from_value) + .collect() + } + + fn opt( + &self, + _engine_state: &EngineState, + stack: &mut Stack, + pos: usize, + ) -> Result, ShellError> { + self.positional_iter(stack) + .nth(pos) + .cloned() + .map(T::from_value) + .transpose() + } + + fn opt_const( + &self, + _working_set: &StateWorkingSet, + _pos: usize, + ) -> Result, ShellError> { + Err(ShellError::IrEvalError { + msg: "const evaluation is not yet implemented on ir::Call".into(), + span: Some(self.head), + }) + } + + fn req( + &self, + engine_state: &EngineState, + stack: &mut Stack, + pos: usize, + ) -> Result { + if let Some(val) = self.opt(engine_state, stack, pos)? { + Ok(val) + } else if self.positional_len(stack) == 0 { + Err(ShellError::AccessEmptyContent { span: self.head }) + } else { + Err(ShellError::AccessBeyondEnd { + max_idx: self.positional_len(stack) - 1, + span: self.head, + }) + } + } + + fn req_parser_info( + &self, + engine_state: &EngineState, + stack: &mut Stack, + name: &str, + ) -> Result { + // FIXME: this depends on the AST evaluator. We can fix this by making the parser info an + // enum rather than using expressions. It's not clear that evaluation of this is ever really + // needed. + if let Some(expr) = self.get_parser_info(stack, name) { + let expr = expr.clone(); + let stack = &mut stack.use_call_arg_out_dest(); + let result = eval_expression::(engine_state, stack, &expr)?; + FromValue::from_value(result) + } else { + Err(ShellError::CantFindColumn { + col_name: name.into(), + span: None, + src_span: self.head, + }) + } + } + + fn has_positional_args(&self, stack: &Stack, starting_pos: usize) -> bool { + self.rest_iter(stack, starting_pos).next().is_some() + } +} + +macro_rules! proxy { + ($self:ident . $method:ident ($($param:expr),*)) => (match &$self.inner { + engine::CallImpl::AstRef(call) => call.$method($($param),*), + engine::CallImpl::AstBox(call) => call.$method($($param),*), + engine::CallImpl::IrRef(call) => call.$method($($param),*), + engine::CallImpl::IrBox(call) => call.$method($($param),*), + }) +} + +impl CallExt for engine::Call<'_> { + fn has_flag( + &self, + engine_state: &EngineState, + stack: &mut Stack, + flag_name: &str, + ) -> Result { + proxy!(self.has_flag(engine_state, stack, flag_name)) + } + + fn get_flag( + &self, + engine_state: &EngineState, + stack: &mut Stack, + name: &str, + ) -> Result, ShellError> { + proxy!(self.get_flag(engine_state, stack, name)) + } + + fn get_flag_span(&self, stack: &Stack, name: &str) -> Option { + proxy!(self.get_flag_span(stack, name)) + } + + fn rest( + &self, + engine_state: &EngineState, + stack: &mut Stack, + starting_pos: usize, + ) -> Result, ShellError> { + proxy!(self.rest(engine_state, stack, starting_pos)) + } + + fn opt( + &self, + engine_state: &EngineState, + stack: &mut Stack, + pos: usize, + ) -> Result, ShellError> { + proxy!(self.opt(engine_state, stack, pos)) + } + + fn opt_const( + &self, + working_set: &StateWorkingSet, + pos: usize, + ) -> Result, ShellError> { + proxy!(self.opt_const(working_set, pos)) + } + + fn req( + &self, + engine_state: &EngineState, + stack: &mut Stack, + pos: usize, + ) -> Result { + proxy!(self.req(engine_state, stack, pos)) + } + + fn req_parser_info( + &self, + engine_state: &EngineState, + stack: &mut Stack, + name: &str, + ) -> Result { + proxy!(self.req_parser_info(engine_state, stack, name)) + } + + fn has_positional_args(&self, stack: &Stack, starting_pos: usize) -> bool { + proxy!(self.has_positional_args(stack, starting_pos)) + } } diff --git a/crates/nu-engine/src/command_prelude.rs b/crates/nu-engine/src/command_prelude.rs index 5c21af27e0..e6ddb5fb91 100644 --- a/crates/nu-engine/src/command_prelude.rs +++ b/crates/nu-engine/src/command_prelude.rs @@ -1,7 +1,7 @@ pub use crate::CallExt; pub use nu_protocol::{ - ast::{Call, CellPath}, - engine::{Command, EngineState, Stack, StateWorkingSet}, + ast::CellPath, + engine::{Call, Command, EngineState, Stack, StateWorkingSet}, record, ByteStream, ByteStreamType, Category, ErrSpan, Example, IntoInterruptiblePipelineData, IntoPipelineData, IntoSpanned, PipelineData, Record, ShellError, Signature, Span, Spanned, SyntaxShape, Type, Value, diff --git a/crates/nu-engine/src/compile/builder.rs b/crates/nu-engine/src/compile/builder.rs new file mode 100644 index 0000000000..d77075cc3f --- /dev/null +++ b/crates/nu-engine/src/compile/builder.rs @@ -0,0 +1,575 @@ +use nu_protocol::{ + ir::{DataSlice, Instruction, IrAstRef, IrBlock, Literal}, + CompileError, IntoSpanned, RegId, Span, Spanned, +}; + +/// A label identifier. Only exists while building code. Replaced with the actual target. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) struct LabelId(pub usize); + +/// Builds [`IrBlock`]s progressively by consuming instructions and handles register allocation. +#[derive(Debug)] +pub(crate) struct BlockBuilder { + pub(crate) block_span: Option, + pub(crate) instructions: Vec, + pub(crate) spans: Vec, + /// The actual instruction index that a label refers to. While building IR, branch targets are + /// specified as indices into this array rather than the true instruction index. This makes it + /// easier to make modifications to code, as just this array needs to be changed, and it's also + /// less error prone as during `finish()` we check to make sure all of the used labels have had + /// an index actually set. + pub(crate) labels: Vec>, + pub(crate) data: Vec, + pub(crate) ast: Vec>, + pub(crate) comments: Vec, + pub(crate) register_allocation_state: Vec, + pub(crate) file_count: u32, + pub(crate) loop_stack: Vec, +} + +impl BlockBuilder { + /// Starts a new block, with the first register (`%0`) allocated as input. + pub(crate) fn new(block_span: Option) -> Self { + BlockBuilder { + block_span, + instructions: vec![], + spans: vec![], + labels: vec![], + data: vec![], + ast: vec![], + comments: vec![], + register_allocation_state: vec![true], + file_count: 0, + loop_stack: vec![], + } + } + + /// Get the next unused register for code generation. + pub(crate) fn next_register(&mut self) -> Result { + if let Some(index) = self + .register_allocation_state + .iter_mut() + .position(|is_allocated| { + if !*is_allocated { + *is_allocated = true; + true + } else { + false + } + }) + { + Ok(RegId(index as u32)) + } else if self.register_allocation_state.len() < (u32::MAX as usize - 2) { + let reg_id = RegId(self.register_allocation_state.len() as u32); + self.register_allocation_state.push(true); + Ok(reg_id) + } else { + Err(CompileError::RegisterOverflow { + block_span: self.block_span, + }) + } + } + + /// Check if a register is initialized with a value. + pub(crate) fn is_allocated(&self, reg_id: RegId) -> bool { + self.register_allocation_state + .get(reg_id.0 as usize) + .is_some_and(|state| *state) + } + + /// Mark a register as initialized. + pub(crate) fn mark_register(&mut self, reg_id: RegId) -> Result<(), CompileError> { + if let Some(is_allocated) = self.register_allocation_state.get_mut(reg_id.0 as usize) { + *is_allocated = true; + Ok(()) + } else { + Err(CompileError::RegisterOverflow { + block_span: self.block_span, + }) + } + } + + /// Mark a register as empty, so that it can be used again by something else. + #[track_caller] + pub(crate) fn free_register(&mut self, reg_id: RegId) -> Result<(), CompileError> { + let index = reg_id.0 as usize; + + if self + .register_allocation_state + .get(index) + .is_some_and(|is_allocated| *is_allocated) + { + self.register_allocation_state[index] = false; + Ok(()) + } else { + log::warn!("register {reg_id} uninitialized, builder = {self:#?}"); + Err(CompileError::RegisterUninitialized { + reg_id, + caller: std::panic::Location::caller().to_string(), + }) + } + } + + /// Define a label, which can be used by branch instructions. The target can optionally be + /// specified now. + pub(crate) fn label(&mut self, target_index: Option) -> LabelId { + let label_id = self.labels.len(); + self.labels.push(target_index); + LabelId(label_id) + } + + /// Change the target of a label. + pub(crate) fn set_label( + &mut self, + label_id: LabelId, + target_index: usize, + ) -> Result<(), CompileError> { + *self + .labels + .get_mut(label_id.0) + .ok_or(CompileError::UndefinedLabel { + label_id: label_id.0, + span: None, + })? = Some(target_index); + Ok(()) + } + + /// Insert an instruction into the block, automatically marking any registers populated by + /// the instruction, and freeing any registers consumed by the instruction. + #[track_caller] + pub(crate) fn push(&mut self, instruction: Spanned) -> Result<(), CompileError> { + // Free read registers, and mark write registers. + // + // If a register is both read and written, it should be on both sides, so that we can verify + // that the register was in the right state beforehand. + let mut allocate = |read: &[RegId], write: &[RegId]| -> Result<(), CompileError> { + for reg in read { + self.free_register(*reg)?; + } + for reg in write { + self.mark_register(*reg)?; + } + Ok(()) + }; + + let allocate_result = match &instruction.item { + Instruction::Unreachable => Ok(()), + Instruction::LoadLiteral { dst, lit } => { + allocate(&[], &[*dst]).and( + // Free any registers on the literal + match lit { + Literal::Range { + start, + step, + end, + inclusion: _, + } => allocate(&[*start, *step, *end], &[]), + Literal::Bool(_) + | Literal::Int(_) + | Literal::Float(_) + | Literal::Filesize(_) + | Literal::Duration(_) + | Literal::Binary(_) + | Literal::Block(_) + | Literal::Closure(_) + | Literal::RowCondition(_) + | Literal::List { capacity: _ } + | Literal::Record { capacity: _ } + | Literal::Filepath { + val: _, + no_expand: _, + } + | Literal::Directory { + val: _, + no_expand: _, + } + | Literal::GlobPattern { + val: _, + no_expand: _, + } + | Literal::String(_) + | Literal::RawString(_) + | Literal::CellPath(_) + | Literal::Date(_) + | Literal::Nothing => Ok(()), + }, + ) + } + Instruction::LoadValue { dst, val: _ } => allocate(&[], &[*dst]), + Instruction::Move { dst, src } => allocate(&[*src], &[*dst]), + Instruction::Clone { dst, src } => allocate(&[*src], &[*dst, *src]), + Instruction::Collect { src_dst } => allocate(&[*src_dst], &[*src_dst]), + Instruction::Span { src_dst } => allocate(&[*src_dst], &[*src_dst]), + Instruction::Drop { src } => allocate(&[*src], &[]), + Instruction::Drain { src } => allocate(&[*src], &[]), + Instruction::LoadVariable { dst, var_id: _ } => allocate(&[], &[*dst]), + Instruction::StoreVariable { var_id: _, src } => allocate(&[*src], &[]), + Instruction::LoadEnv { dst, key: _ } => allocate(&[], &[*dst]), + Instruction::LoadEnvOpt { dst, key: _ } => allocate(&[], &[*dst]), + Instruction::StoreEnv { key: _, src } => allocate(&[*src], &[]), + Instruction::PushPositional { src } => allocate(&[*src], &[]), + Instruction::AppendRest { src } => allocate(&[*src], &[]), + Instruction::PushFlag { name: _ } => Ok(()), + Instruction::PushShortFlag { short: _ } => Ok(()), + Instruction::PushNamed { name: _, src } => allocate(&[*src], &[]), + Instruction::PushShortNamed { short: _, src } => allocate(&[*src], &[]), + Instruction::PushParserInfo { name: _, info: _ } => Ok(()), + Instruction::RedirectOut { mode: _ } => Ok(()), + Instruction::RedirectErr { mode: _ } => Ok(()), + Instruction::CheckErrRedirected { src } => allocate(&[*src], &[*src]), + Instruction::OpenFile { + file_num: _, + path, + append: _, + } => allocate(&[*path], &[]), + Instruction::WriteFile { file_num: _, src } => allocate(&[*src], &[]), + Instruction::CloseFile { file_num: _ } => Ok(()), + Instruction::Call { + decl_id: _, + src_dst, + } => allocate(&[*src_dst], &[*src_dst]), + Instruction::StringAppend { src_dst, val } => allocate(&[*src_dst, *val], &[*src_dst]), + Instruction::GlobFrom { + src_dst, + no_expand: _, + } => allocate(&[*src_dst], &[*src_dst]), + Instruction::ListPush { src_dst, item } => allocate(&[*src_dst, *item], &[*src_dst]), + Instruction::ListSpread { src_dst, items } => { + allocate(&[*src_dst, *items], &[*src_dst]) + } + Instruction::RecordInsert { src_dst, key, val } => { + allocate(&[*src_dst, *key, *val], &[*src_dst]) + } + Instruction::RecordSpread { src_dst, items } => { + allocate(&[*src_dst, *items], &[*src_dst]) + } + Instruction::Not { src_dst } => allocate(&[*src_dst], &[*src_dst]), + Instruction::BinaryOp { + lhs_dst, + op: _, + rhs, + } => allocate(&[*lhs_dst, *rhs], &[*lhs_dst]), + Instruction::FollowCellPath { src_dst, path } => { + allocate(&[*src_dst, *path], &[*src_dst]) + } + Instruction::CloneCellPath { dst, src, path } => { + allocate(&[*src, *path], &[*src, *dst]) + } + Instruction::UpsertCellPath { + src_dst, + path, + new_value, + } => allocate(&[*src_dst, *path, *new_value], &[*src_dst]), + Instruction::Jump { index: _ } => Ok(()), + Instruction::BranchIf { cond, index: _ } => allocate(&[*cond], &[]), + Instruction::BranchIfEmpty { src, index: _ } => allocate(&[*src], &[*src]), + Instruction::Match { + pattern: _, + src, + index: _, + } => allocate(&[*src], &[*src]), + Instruction::CheckMatchGuard { src } => allocate(&[*src], &[*src]), + Instruction::Iterate { + dst, + stream, + end_index: _, + } => allocate(&[*stream], &[*dst, *stream]), + Instruction::OnError { index: _ } => Ok(()), + Instruction::OnErrorInto { index: _, dst } => allocate(&[], &[*dst]), + Instruction::PopErrorHandler => Ok(()), + Instruction::CheckExternalFailed { dst, src } => allocate(&[*src], &[*dst, *src]), + Instruction::ReturnEarly { src } => allocate(&[*src], &[]), + Instruction::Return { src } => allocate(&[*src], &[]), + }; + + // Add more context to the error + match allocate_result { + Ok(()) => (), + Err(CompileError::RegisterUninitialized { reg_id, caller }) => { + return Err(CompileError::RegisterUninitializedWhilePushingInstruction { + reg_id, + caller, + instruction: format!("{:?}", instruction.item), + span: instruction.span, + }); + } + Err(err) => return Err(err), + } + + self.instructions.push(instruction.item); + self.spans.push(instruction.span); + self.ast.push(None); + self.comments.push(String::new()); + Ok(()) + } + + /// Set the AST of the last instruction. Separate method because it's rarely used. + pub(crate) fn set_last_ast(&mut self, ast_ref: Option) { + *self.ast.last_mut().expect("no last instruction") = ast_ref; + } + + /// Add a comment to the last instruction. + pub(crate) fn add_comment(&mut self, comment: impl std::fmt::Display) { + add_comment( + self.comments.last_mut().expect("no last instruction"), + comment, + ) + } + + /// Load a register with a literal. + pub(crate) fn load_literal( + &mut self, + reg_id: RegId, + literal: Spanned, + ) -> Result<(), CompileError> { + self.push( + Instruction::LoadLiteral { + dst: reg_id, + lit: literal.item, + } + .into_spanned(literal.span), + )?; + Ok(()) + } + + /// Allocate a new register and load a literal into it. + pub(crate) fn literal(&mut self, literal: Spanned) -> Result { + let reg_id = self.next_register()?; + self.load_literal(reg_id, literal)?; + Ok(reg_id) + } + + /// Deallocate a register and set it to `Empty`, if it is allocated + pub(crate) fn drop_reg(&mut self, reg_id: RegId) -> Result<(), CompileError> { + if self.is_allocated(reg_id) { + self.push(Instruction::Drop { src: reg_id }.into_spanned(Span::unknown()))?; + } + Ok(()) + } + + /// Set a register to `Empty`, but mark it as in-use, e.g. for input + pub(crate) fn load_empty(&mut self, reg_id: RegId) -> Result<(), CompileError> { + self.drop_reg(reg_id)?; + self.mark_register(reg_id) + } + + /// Drain the stream in a register (fully consuming it) + pub(crate) fn drain(&mut self, src: RegId, span: Span) -> Result<(), CompileError> { + self.push(Instruction::Drain { src }.into_spanned(span)) + } + + /// Add data to the `data` array and return a [`DataSlice`] referencing it. + pub(crate) fn data(&mut self, data: impl AsRef<[u8]>) -> Result { + let data = data.as_ref(); + let start = self.data.len(); + if data.is_empty() { + Ok(DataSlice::empty()) + } else if start + data.len() < u32::MAX as usize { + let slice = DataSlice { + start: start as u32, + len: data.len() as u32, + }; + self.data.extend_from_slice(data); + Ok(slice) + } else { + Err(CompileError::DataOverflow { + block_span: self.block_span, + }) + } + } + + /// Clone a register with a `clone` instruction. + pub(crate) fn clone_reg(&mut self, src: RegId, span: Span) -> Result { + let dst = self.next_register()?; + self.push(Instruction::Clone { dst, src }.into_spanned(span))?; + Ok(dst) + } + + /// Add a `branch-if` instruction + pub(crate) fn branch_if( + &mut self, + cond: RegId, + label_id: LabelId, + span: Span, + ) -> Result<(), CompileError> { + self.push( + Instruction::BranchIf { + cond, + index: label_id.0, + } + .into_spanned(span), + ) + } + + /// Add a `branch-if-empty` instruction + pub(crate) fn branch_if_empty( + &mut self, + src: RegId, + label_id: LabelId, + span: Span, + ) -> Result<(), CompileError> { + self.push( + Instruction::BranchIfEmpty { + src, + index: label_id.0, + } + .into_spanned(span), + ) + } + + /// Add a `jump` instruction + pub(crate) fn jump(&mut self, label_id: LabelId, span: Span) -> Result<(), CompileError> { + self.push(Instruction::Jump { index: label_id.0 }.into_spanned(span)) + } + + /// The index that the next instruction [`.push()`]ed will have. + pub(crate) fn here(&self) -> usize { + self.instructions.len() + } + + /// Allocate a new file number, for redirection. + pub(crate) fn next_file_num(&mut self) -> Result { + let next = self.file_count; + self.file_count = self + .file_count + .checked_add(1) + .ok_or(CompileError::FileOverflow { + block_span: self.block_span, + })?; + Ok(next) + } + + /// Push a new loop state onto the builder. Creates new labels that must be set. + pub(crate) fn begin_loop(&mut self) -> Loop { + let loop_ = Loop { + break_label: self.label(None), + continue_label: self.label(None), + }; + self.loop_stack.push(loop_); + loop_ + } + + /// True if we are currently in a loop. + pub(crate) fn is_in_loop(&self) -> bool { + !self.loop_stack.is_empty() + } + + /// Add a loop breaking jump instruction. + pub(crate) fn push_break(&mut self, span: Span) -> Result<(), CompileError> { + let loop_ = self + .loop_stack + .last() + .ok_or_else(|| CompileError::NotInALoop { + msg: "`break` called from outside of a loop".into(), + span: Some(span), + })?; + self.jump(loop_.break_label, span) + } + + /// Add a loop continuing jump instruction. + pub(crate) fn push_continue(&mut self, span: Span) -> Result<(), CompileError> { + let loop_ = self + .loop_stack + .last() + .ok_or_else(|| CompileError::NotInALoop { + msg: "`continue` called from outside of a loop".into(), + span: Some(span), + })?; + self.jump(loop_.continue_label, span) + } + + /// Pop the loop state. Checks that the loop being ended is the same one that was expected. + pub(crate) fn end_loop(&mut self, loop_: Loop) -> Result<(), CompileError> { + let ended_loop = self + .loop_stack + .pop() + .ok_or_else(|| CompileError::NotInALoop { + msg: "end_loop() called outside of a loop".into(), + span: None, + })?; + + if ended_loop == loop_ { + Ok(()) + } else { + Err(CompileError::IncoherentLoopState { + block_span: self.block_span, + }) + } + } + + /// Mark an unreachable code path. Produces an error at runtime if executed. + pub(crate) fn unreachable(&mut self, span: Span) -> Result<(), CompileError> { + self.push(Instruction::Unreachable.into_spanned(span)) + } + + /// Consume the builder and produce the final [`IrBlock`]. + pub(crate) fn finish(mut self) -> Result { + // Add comments to label targets + for (index, label_target) in self.labels.iter().enumerate() { + if let Some(label_target) = label_target { + add_comment( + &mut self.comments[*label_target], + format_args!("label({index})"), + ); + } + } + + // Populate the actual target indices of labels into the instructions + for ((index, instruction), span) in + self.instructions.iter_mut().enumerate().zip(&self.spans) + { + if let Some(label_id) = instruction.branch_target() { + let target_index = self.labels.get(label_id).cloned().flatten().ok_or( + CompileError::UndefinedLabel { + label_id, + span: Some(*span), + }, + )?; + // Add a comment to the target index that we come from here + add_comment( + &mut self.comments[target_index], + format_args!("from({index}:)"), + ); + instruction.set_branch_target(target_index).map_err(|_| { + CompileError::SetBranchTargetOfNonBranchInstruction { + instruction: format!("{:?}", instruction), + span: *span, + } + })?; + } + } + + Ok(IrBlock { + instructions: self.instructions, + spans: self.spans, + data: self.data.into(), + ast: self.ast, + comments: self.comments.into_iter().map(|s| s.into()).collect(), + register_count: self + .register_allocation_state + .len() + .try_into() + .expect("register count overflowed in finish() despite previous checks"), + file_count: self.file_count, + }) + } +} + +/// Keeps track of the `break` and `continue` target labels for a loop. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) struct Loop { + pub(crate) break_label: LabelId, + pub(crate) continue_label: LabelId, +} + +/// Add a new comment to an existing one +fn add_comment(comment: &mut String, new_comment: impl std::fmt::Display) { + use std::fmt::Write; + write!( + comment, + "{}{}", + if comment.is_empty() { "" } else { ", " }, + new_comment + ) + .expect("formatting failed"); +} diff --git a/crates/nu-engine/src/compile/call.rs b/crates/nu-engine/src/compile/call.rs new file mode 100644 index 0000000000..d9f1b8e581 --- /dev/null +++ b/crates/nu-engine/src/compile/call.rs @@ -0,0 +1,270 @@ +use std::sync::Arc; + +use nu_protocol::{ + ast::{Argument, Call, Expression, ExternalArgument}, + engine::StateWorkingSet, + ir::{Instruction, IrAstRef, Literal}, + IntoSpanned, RegId, Span, Spanned, +}; + +use super::{compile_expression, keyword::*, BlockBuilder, CompileError, RedirectModes}; + +pub(crate) fn compile_call( + working_set: &StateWorkingSet, + builder: &mut BlockBuilder, + call: &Call, + redirect_modes: RedirectModes, + io_reg: RegId, +) -> Result<(), CompileError> { + let decl = working_set.get_decl(call.decl_id); + + // Check if this call has --help - if so, just redirect to `help` + if call.named_iter().any(|(name, _, _)| name.item == "help") { + return compile_help( + working_set, + builder, + decl.name().into_spanned(call.head), + io_reg, + ); + } + + // Try to figure out if this is a keyword call like `if`, and handle those specially + if decl.is_keyword() { + match decl.name() { + "if" => { + return compile_if(working_set, builder, call, redirect_modes, io_reg); + } + "match" => { + return compile_match(working_set, builder, call, redirect_modes, io_reg); + } + "const" => { + // This differs from the behavior of the const command, which adds the const value + // to the stack. Since `load-variable` also checks `engine_state` for the variable + // and will get a const value though, is it really necessary to do that? + return builder.load_empty(io_reg); + } + "alias" => { + // Alias does nothing + return builder.load_empty(io_reg); + } + "let" | "mut" => { + return compile_let(working_set, builder, call, redirect_modes, io_reg); + } + "try" => { + return compile_try(working_set, builder, call, redirect_modes, io_reg); + } + "loop" => { + return compile_loop(working_set, builder, call, redirect_modes, io_reg); + } + "while" => { + return compile_while(working_set, builder, call, redirect_modes, io_reg); + } + "for" => { + return compile_for(working_set, builder, call, redirect_modes, io_reg); + } + "break" => { + return compile_break(working_set, builder, call, redirect_modes, io_reg); + } + "continue" => { + return compile_continue(working_set, builder, call, redirect_modes, io_reg); + } + "return" => { + return compile_return(working_set, builder, call, redirect_modes, io_reg); + } + _ => (), + } + } + + // Keep AST if the decl needs it. + let requires_ast = decl.requires_ast_for_arguments(); + + // It's important that we evaluate the args first before trying to set up the argument + // state for the call. + // + // We could technically compile anything that isn't another call safely without worrying about + // the argument state, but we'd have to check all of that first and it just isn't really worth + // it. + enum CompiledArg<'a> { + Positional(RegId, Span, Option), + Named( + &'a str, + Option<&'a str>, + Option, + Span, + Option, + ), + Spread(RegId, Span, Option), + } + + let mut compiled_args = vec![]; + + for arg in &call.arguments { + let arg_reg = arg + .expr() + .map(|expr| { + let arg_reg = builder.next_register()?; + + compile_expression( + working_set, + builder, + expr, + RedirectModes::capture_out(arg.span()), + None, + arg_reg, + )?; + + Ok(arg_reg) + }) + .transpose()?; + + let ast_ref = arg + .expr() + .filter(|_| requires_ast) + .map(|expr| IrAstRef(Arc::new(expr.clone()))); + + match arg { + Argument::Positional(_) | Argument::Unknown(_) => { + compiled_args.push(CompiledArg::Positional( + arg_reg.expect("expr() None in non-Named"), + arg.span(), + ast_ref, + )) + } + Argument::Named((name, short, _)) => compiled_args.push(CompiledArg::Named( + &name.item, + short.as_ref().map(|spanned| spanned.item.as_str()), + arg_reg, + arg.span(), + ast_ref, + )), + Argument::Spread(_) => compiled_args.push(CompiledArg::Spread( + arg_reg.expect("expr() None in non-Named"), + arg.span(), + ast_ref, + )), + } + } + + // Now that the args are all compiled, set up the call state (argument stack and redirections) + for arg in compiled_args { + match arg { + CompiledArg::Positional(reg, span, ast_ref) => { + builder.push(Instruction::PushPositional { src: reg }.into_spanned(span))?; + builder.set_last_ast(ast_ref); + } + CompiledArg::Named(name, short, Some(reg), span, ast_ref) => { + if !name.is_empty() { + let name = builder.data(name)?; + builder.push(Instruction::PushNamed { name, src: reg }.into_spanned(span))?; + } else { + let short = builder.data(short.unwrap_or(""))?; + builder + .push(Instruction::PushShortNamed { short, src: reg }.into_spanned(span))?; + } + builder.set_last_ast(ast_ref); + } + CompiledArg::Named(name, short, None, span, ast_ref) => { + if !name.is_empty() { + let name = builder.data(name)?; + builder.push(Instruction::PushFlag { name }.into_spanned(span))?; + } else { + let short = builder.data(short.unwrap_or(""))?; + builder.push(Instruction::PushShortFlag { short }.into_spanned(span))?; + } + builder.set_last_ast(ast_ref); + } + CompiledArg::Spread(reg, span, ast_ref) => { + builder.push(Instruction::AppendRest { src: reg }.into_spanned(span))?; + builder.set_last_ast(ast_ref); + } + } + } + + // Add any parser info from the call + for (name, info) in &call.parser_info { + let name = builder.data(name)?; + let info = Box::new(info.clone()); + builder.push(Instruction::PushParserInfo { name, info }.into_spanned(call.head))?; + } + + if let Some(mode) = redirect_modes.out { + builder.push(mode.map(|mode| Instruction::RedirectOut { mode }))?; + } + + if let Some(mode) = redirect_modes.err { + builder.push(mode.map(|mode| Instruction::RedirectErr { mode }))?; + } + + // The state is set up, so we can do the call into io_reg + builder.push( + Instruction::Call { + decl_id: call.decl_id, + src_dst: io_reg, + } + .into_spanned(call.head), + )?; + + Ok(()) +} + +pub(crate) fn compile_help( + working_set: &StateWorkingSet<'_>, + builder: &mut BlockBuilder, + decl_name: Spanned<&str>, + io_reg: RegId, +) -> Result<(), CompileError> { + let help_command_id = + working_set + .find_decl(b"help") + .ok_or_else(|| CompileError::MissingRequiredDeclaration { + decl_name: "help".into(), + span: decl_name.span, + })?; + + let name_data = builder.data(decl_name.item)?; + let name_literal = builder.literal(decl_name.map(|_| Literal::String(name_data)))?; + + builder.push(Instruction::PushPositional { src: name_literal }.into_spanned(decl_name.span))?; + + builder.push( + Instruction::Call { + decl_id: help_command_id, + src_dst: io_reg, + } + .into_spanned(decl_name.span), + )?; + + Ok(()) +} + +pub(crate) fn compile_external_call( + working_set: &StateWorkingSet, + builder: &mut BlockBuilder, + head: &Expression, + args: &[ExternalArgument], + redirect_modes: RedirectModes, + io_reg: RegId, +) -> Result<(), CompileError> { + // Pass everything to run-external + let run_external_id = working_set + .find_decl(b"run-external") + .ok_or(CompileError::RunExternalNotFound { span: head.span })?; + + let mut call = Call::new(head.span); + call.decl_id = run_external_id; + + call.arguments.push(Argument::Positional(head.clone())); + + for arg in args { + match arg { + ExternalArgument::Regular(expr) => { + call.arguments.push(Argument::Positional(expr.clone())); + } + ExternalArgument::Spread(expr) => { + call.arguments.push(Argument::Spread(expr.clone())); + } + } + } + + compile_call(working_set, builder, &call, redirect_modes, io_reg) +} diff --git a/crates/nu-engine/src/compile/expression.rs b/crates/nu-engine/src/compile/expression.rs new file mode 100644 index 0000000000..38ee58ea26 --- /dev/null +++ b/crates/nu-engine/src/compile/expression.rs @@ -0,0 +1,535 @@ +use super::{ + compile_binary_op, compile_block, compile_call, compile_external_call, compile_load_env, + BlockBuilder, CompileError, RedirectModes, +}; + +use nu_protocol::{ + ast::{CellPath, Expr, Expression, ListItem, RecordItem, ValueWithUnit}, + engine::StateWorkingSet, + ir::{DataSlice, Instruction, Literal}, + IntoSpanned, RegId, Span, Value, ENV_VARIABLE_ID, +}; + +pub(crate) fn compile_expression( + working_set: &StateWorkingSet, + builder: &mut BlockBuilder, + expr: &Expression, + redirect_modes: RedirectModes, + in_reg: Option, + out_reg: RegId, +) -> Result<(), CompileError> { + let drop_input = |builder: &mut BlockBuilder| { + if let Some(in_reg) = in_reg { + if in_reg != out_reg { + builder.drop_reg(in_reg)?; + } + } + Ok(()) + }; + + let lit = |builder: &mut BlockBuilder, literal: Literal| { + drop_input(builder)?; + + builder + .push( + Instruction::LoadLiteral { + dst: out_reg, + lit: literal, + } + .into_spanned(expr.span), + ) + .map(|_| ()) + }; + + let ignore = |builder: &mut BlockBuilder| { + drop_input(builder)?; + builder.load_empty(out_reg) + }; + + let unexpected = |expr_name: &str| CompileError::UnexpectedExpression { + expr_name: expr_name.into(), + span: expr.span, + }; + + let move_in_reg_to_out_reg = |builder: &mut BlockBuilder| { + // Ensure that out_reg contains the input value, because a call only uses one register + if let Some(in_reg) = in_reg { + if in_reg != out_reg { + // Have to move in_reg to out_reg so it can be used + builder.push( + Instruction::Move { + dst: out_reg, + src: in_reg, + } + .into_spanned(expr.span), + )?; + } + } else { + // Will have to initialize out_reg with Empty first + builder.load_empty(out_reg)?; + } + Ok(()) + }; + + match &expr.expr { + Expr::Bool(b) => lit(builder, Literal::Bool(*b)), + Expr::Int(i) => lit(builder, Literal::Int(*i)), + Expr::Float(f) => lit(builder, Literal::Float(*f)), + Expr::Binary(bin) => { + let data_slice = builder.data(bin)?; + lit(builder, Literal::Binary(data_slice)) + } + Expr::Range(range) => { + // Compile the subexpressions of the range + let compile_part = |builder: &mut BlockBuilder, + part_expr: Option<&Expression>| + -> Result { + let reg = builder.next_register()?; + if let Some(part_expr) = part_expr { + compile_expression( + working_set, + builder, + part_expr, + RedirectModes::capture_out(part_expr.span), + None, + reg, + )?; + } else { + builder.load_literal(reg, Literal::Nothing.into_spanned(expr.span))?; + } + Ok(reg) + }; + + drop_input(builder)?; + + let start = compile_part(builder, range.from.as_ref())?; + let step = compile_part(builder, range.next.as_ref())?; + let end = compile_part(builder, range.to.as_ref())?; + + // Assemble the range + builder.load_literal( + out_reg, + Literal::Range { + start, + step, + end, + inclusion: range.operator.inclusion, + } + .into_spanned(expr.span), + ) + } + Expr::Var(var_id) => { + drop_input(builder)?; + builder.push( + Instruction::LoadVariable { + dst: out_reg, + var_id: *var_id, + } + .into_spanned(expr.span), + )?; + Ok(()) + } + Expr::VarDecl(_) => Err(unexpected("VarDecl")), + Expr::Call(call) => { + move_in_reg_to_out_reg(builder)?; + + compile_call(working_set, builder, call, redirect_modes, out_reg) + } + Expr::ExternalCall(head, args) => { + move_in_reg_to_out_reg(builder)?; + + compile_external_call(working_set, builder, head, args, redirect_modes, out_reg) + } + Expr::Operator(_) => Err(unexpected("Operator")), + Expr::RowCondition(block_id) => lit(builder, Literal::RowCondition(*block_id)), + Expr::UnaryNot(subexpr) => { + drop_input(builder)?; + compile_expression( + working_set, + builder, + subexpr, + RedirectModes::capture_out(subexpr.span), + None, + out_reg, + )?; + builder.push(Instruction::Not { src_dst: out_reg }.into_spanned(expr.span))?; + Ok(()) + } + Expr::BinaryOp(lhs, op, rhs) => { + if let Expr::Operator(ref operator) = op.expr { + drop_input(builder)?; + compile_binary_op( + working_set, + builder, + lhs, + operator.clone().into_spanned(op.span), + rhs, + expr.span, + out_reg, + ) + } else { + Err(CompileError::UnsupportedOperatorExpression { span: op.span }) + } + } + Expr::Subexpression(block_id) => { + let block = working_set.get_block(*block_id); + compile_block(working_set, builder, block, redirect_modes, in_reg, out_reg) + } + Expr::Block(block_id) => lit(builder, Literal::Block(*block_id)), + Expr::Closure(block_id) => lit(builder, Literal::Closure(*block_id)), + Expr::MatchBlock(_) => Err(unexpected("MatchBlock")), // only for `match` keyword + Expr::List(items) => { + // Guess capacity based on items (does not consider spread as more than 1) + lit( + builder, + Literal::List { + capacity: items.len(), + }, + )?; + for item in items { + // Compile the expression of the item / spread + let reg = builder.next_register()?; + let expr = match item { + ListItem::Item(expr) | ListItem::Spread(_, expr) => expr, + }; + compile_expression( + working_set, + builder, + expr, + RedirectModes::capture_out(expr.span), + None, + reg, + )?; + + match item { + ListItem::Item(_) => { + // Add each item using list-push + builder.push( + Instruction::ListPush { + src_dst: out_reg, + item: reg, + } + .into_spanned(expr.span), + )?; + } + ListItem::Spread(spread_span, _) => { + // Spread the list using list-spread + builder.push( + Instruction::ListSpread { + src_dst: out_reg, + items: reg, + } + .into_spanned(*spread_span), + )?; + } + } + } + Ok(()) + } + Expr::Table(table) => { + lit( + builder, + Literal::List { + capacity: table.rows.len(), + }, + )?; + + // Evaluate the columns + let column_registers = table + .columns + .iter() + .map(|column| { + let reg = builder.next_register()?; + compile_expression( + working_set, + builder, + column, + RedirectModes::capture_out(column.span), + None, + reg, + )?; + Ok(reg) + }) + .collect::, CompileError>>()?; + + // Build records for each row + for row in table.rows.iter() { + let row_reg = builder.next_register()?; + builder.load_literal( + row_reg, + Literal::Record { + capacity: table.columns.len(), + } + .into_spanned(expr.span), + )?; + for (column_reg, item) in column_registers.iter().zip(row.iter()) { + let column_reg = builder.clone_reg(*column_reg, item.span)?; + let item_reg = builder.next_register()?; + compile_expression( + working_set, + builder, + item, + RedirectModes::capture_out(item.span), + None, + item_reg, + )?; + builder.push( + Instruction::RecordInsert { + src_dst: row_reg, + key: column_reg, + val: item_reg, + } + .into_spanned(item.span), + )?; + } + builder.push( + Instruction::ListPush { + src_dst: out_reg, + item: row_reg, + } + .into_spanned(expr.span), + )?; + } + + // Free the column registers, since they aren't needed anymore + for reg in column_registers { + builder.drop_reg(reg)?; + } + + Ok(()) + } + Expr::Record(items) => { + lit( + builder, + Literal::Record { + capacity: items.len(), + }, + )?; + + for item in items { + match item { + RecordItem::Pair(key, val) => { + // Add each item using record-insert + let key_reg = builder.next_register()?; + let val_reg = builder.next_register()?; + compile_expression( + working_set, + builder, + key, + RedirectModes::capture_out(key.span), + None, + key_reg, + )?; + compile_expression( + working_set, + builder, + val, + RedirectModes::capture_out(val.span), + None, + val_reg, + )?; + builder.push( + Instruction::RecordInsert { + src_dst: out_reg, + key: key_reg, + val: val_reg, + } + .into_spanned(expr.span), + )?; + } + RecordItem::Spread(spread_span, expr) => { + // Spread the expression using record-spread + let reg = builder.next_register()?; + compile_expression( + working_set, + builder, + expr, + RedirectModes::capture_out(expr.span), + None, + reg, + )?; + builder.push( + Instruction::RecordSpread { + src_dst: out_reg, + items: reg, + } + .into_spanned(*spread_span), + )?; + } + } + } + Ok(()) + } + Expr::Keyword(kw) => { + // keyword: just pass through expr, since commands that use it and are not being + // specially handled already are often just positional anyway + compile_expression( + working_set, + builder, + &kw.expr, + redirect_modes, + in_reg, + out_reg, + ) + } + Expr::ValueWithUnit(value_with_unit) => { + lit(builder, literal_from_value_with_unit(value_with_unit)?) + } + Expr::DateTime(dt) => lit(builder, Literal::Date(Box::new(*dt))), + Expr::Filepath(path, no_expand) => { + let val = builder.data(path)?; + lit( + builder, + Literal::Filepath { + val, + no_expand: *no_expand, + }, + ) + } + Expr::Directory(path, no_expand) => { + let val = builder.data(path)?; + lit( + builder, + Literal::Directory { + val, + no_expand: *no_expand, + }, + ) + } + Expr::GlobPattern(path, no_expand) => { + let val = builder.data(path)?; + lit( + builder, + Literal::GlobPattern { + val, + no_expand: *no_expand, + }, + ) + } + Expr::String(s) => { + let data_slice = builder.data(s)?; + lit(builder, Literal::String(data_slice)) + } + Expr::RawString(rs) => { + let data_slice = builder.data(rs)?; + lit(builder, Literal::RawString(data_slice)) + } + Expr::CellPath(path) => lit(builder, Literal::CellPath(Box::new(path.clone()))), + Expr::FullCellPath(full_cell_path) => { + if matches!(full_cell_path.head.expr, Expr::Var(ENV_VARIABLE_ID)) { + compile_load_env(builder, expr.span, &full_cell_path.tail, out_reg) + } else { + compile_expression( + working_set, + builder, + &full_cell_path.head, + RedirectModes::capture_out(expr.span), + in_reg, + out_reg, + )?; + // Only do the follow if this is actually needed + if !full_cell_path.tail.is_empty() { + let cell_path_reg = builder.literal( + Literal::CellPath(Box::new(CellPath { + members: full_cell_path.tail.clone(), + })) + .into_spanned(expr.span), + )?; + builder.push( + Instruction::FollowCellPath { + src_dst: out_reg, + path: cell_path_reg, + } + .into_spanned(expr.span), + )?; + } + Ok(()) + } + } + Expr::ImportPattern(_) => Err(unexpected("ImportPattern")), + Expr::Overlay(_) => Err(unexpected("Overlay")), + Expr::Signature(_) => ignore(builder), // no effect + Expr::StringInterpolation(exprs) | Expr::GlobInterpolation(exprs, _) => { + let mut exprs_iter = exprs.iter().peekable(); + + if exprs_iter + .peek() + .is_some_and(|e| matches!(e.expr, Expr::String(..) | Expr::RawString(..))) + { + // If the first expression is a string or raw string literal, just take it and build + // from that + compile_expression( + working_set, + builder, + exprs_iter.next().expect("peek() was Some"), + RedirectModes::capture_out(expr.span), + None, + out_reg, + )?; + } else { + // Start with an empty string + lit(builder, Literal::String(DataSlice::empty()))?; + } + + // Compile each expression and append to out_reg + for expr in exprs_iter { + let scratch_reg = builder.next_register()?; + compile_expression( + working_set, + builder, + expr, + RedirectModes::capture_out(expr.span), + None, + scratch_reg, + )?; + builder.push( + Instruction::StringAppend { + src_dst: out_reg, + val: scratch_reg, + } + .into_spanned(expr.span), + )?; + } + + // If it's a glob interpolation, change it to a glob + if let Expr::GlobInterpolation(_, no_expand) = expr.expr { + builder.push( + Instruction::GlobFrom { + src_dst: out_reg, + no_expand, + } + .into_spanned(expr.span), + )?; + } + + Ok(()) + } + Expr::Nothing => lit(builder, Literal::Nothing), + Expr::Garbage => Err(CompileError::Garbage { span: expr.span }), + } +} + +fn literal_from_value_with_unit(value_with_unit: &ValueWithUnit) -> Result { + let Expr::Int(int_value) = value_with_unit.expr.expr else { + return Err(CompileError::UnexpectedExpression { + expr_name: format!("{:?}", value_with_unit.expr), + span: value_with_unit.expr.span, + }); + }; + + match value_with_unit + .unit + .item + .build_value(int_value, Span::unknown()) + .map_err(|err| CompileError::InvalidLiteral { + msg: err.to_string(), + span: value_with_unit.expr.span, + })? { + Value::Filesize { val, .. } => Ok(Literal::Filesize(val)), + Value::Duration { val, .. } => Ok(Literal::Duration(val)), + other => Err(CompileError::InvalidLiteral { + msg: format!("bad value returned by Unit::build_value(): {other:?}"), + span: value_with_unit.unit.span, + }), + } +} diff --git a/crates/nu-engine/src/compile/keyword.rs b/crates/nu-engine/src/compile/keyword.rs new file mode 100644 index 0000000000..12f4a54c10 --- /dev/null +++ b/crates/nu-engine/src/compile/keyword.rs @@ -0,0 +1,902 @@ +use nu_protocol::{ + ast::{Block, Call, Expr, Expression}, + engine::StateWorkingSet, + ir::Instruction, + IntoSpanned, RegId, Type, VarId, +}; + +use super::{compile_block, compile_expression, BlockBuilder, CompileError, RedirectModes}; + +/// Compile a call to `if` as a branch-if +pub(crate) fn compile_if( + working_set: &StateWorkingSet, + builder: &mut BlockBuilder, + call: &Call, + redirect_modes: RedirectModes, + io_reg: RegId, +) -> Result<(), CompileError> { + // Pseudocode: + // + // %io_reg <- + // not %io_reg + // branch-if %io_reg, FALSE + // TRUE: ...... + // jump END + // FALSE: ...... OR drop %io_reg + // END: + let invalid = || CompileError::InvalidKeywordCall { + keyword: "if".into(), + span: call.head, + }; + + let condition = call.positional_nth(0).ok_or_else(invalid)?; + let true_block_arg = call.positional_nth(1).ok_or_else(invalid)?; + let else_arg = call.positional_nth(2); + + let true_block_id = true_block_arg.as_block().ok_or_else(invalid)?; + let true_block = working_set.get_block(true_block_id); + + let true_label = builder.label(None); + let false_label = builder.label(None); + let end_label = builder.label(None); + + let not_condition_reg = { + // Compile the condition first + let condition_reg = builder.next_register()?; + compile_expression( + working_set, + builder, + condition, + RedirectModes::capture_out(condition.span), + None, + condition_reg, + )?; + + // Negate the condition - we basically only want to jump if the condition is false + builder.push( + Instruction::Not { + src_dst: condition_reg, + } + .into_spanned(call.head), + )?; + + condition_reg + }; + + // Set up a branch if the condition is false. + builder.branch_if(not_condition_reg, false_label, call.head)?; + builder.add_comment("if false"); + + // Compile the true case + builder.set_label(true_label, builder.here())?; + compile_block( + working_set, + builder, + true_block, + redirect_modes.clone(), + Some(io_reg), + io_reg, + )?; + + // Add a jump over the false case + builder.jump(end_label, else_arg.map(|e| e.span).unwrap_or(call.head))?; + builder.add_comment("end if"); + + // On the else side now, assert that io_reg is still valid + builder.set_label(false_label, builder.here())?; + builder.mark_register(io_reg)?; + + if let Some(else_arg) = else_arg { + let Expression { + expr: Expr::Keyword(else_keyword), + .. + } = else_arg + else { + return Err(invalid()); + }; + + if else_keyword.keyword.as_ref() != b"else" { + return Err(invalid()); + } + + let else_expr = &else_keyword.expr; + + match &else_expr.expr { + Expr::Block(block_id) => { + let false_block = working_set.get_block(*block_id); + compile_block( + working_set, + builder, + false_block, + redirect_modes, + Some(io_reg), + io_reg, + )?; + } + _ => { + // The else case supports bare expressions too, not only blocks + compile_expression( + working_set, + builder, + else_expr, + redirect_modes, + Some(io_reg), + io_reg, + )?; + } + } + } else { + // We don't have an else expression/block, so just set io_reg = Empty + builder.load_empty(io_reg)?; + } + + // Set the end label + builder.set_label(end_label, builder.here())?; + + Ok(()) +} + +/// Compile a call to `match` +pub(crate) fn compile_match( + working_set: &StateWorkingSet, + builder: &mut BlockBuilder, + call: &Call, + redirect_modes: RedirectModes, + io_reg: RegId, +) -> Result<(), CompileError> { + // Pseudocode: + // + // %match_reg <- + // collect %match_reg + // match (pat1), %match_reg, PAT1 + // MATCH2: match (pat2), %match_reg, PAT2 + // FAIL: drop %io_reg + // drop %match_reg + // jump END + // PAT1: %guard_reg <- + // check-match-guard %guard_reg + // not %guard_reg + // branch-if %guard_reg, MATCH2 + // drop %match_reg + // <...expr...> + // jump END + // PAT2: drop %match_reg + // <...expr...> + // jump END + // END: + let invalid = || CompileError::InvalidKeywordCall { + keyword: "match".into(), + span: call.head, + }; + + let match_expr = call.positional_nth(0).ok_or_else(invalid)?; + + let match_block_arg = call.positional_nth(1).ok_or_else(invalid)?; + let match_block = match_block_arg.as_match_block().ok_or_else(invalid)?; + + let match_reg = builder.next_register()?; + + // Evaluate the match expression (patterns will be checked against this). + compile_expression( + working_set, + builder, + match_expr, + RedirectModes::capture_out(match_expr.span), + None, + match_reg, + )?; + + // Important to collect it first + builder.push(Instruction::Collect { src_dst: match_reg }.into_spanned(match_expr.span))?; + + // Generate the `match` instructions. Guards are not used at this stage. + let mut match_labels = Vec::with_capacity(match_block.len()); + let mut next_labels = Vec::with_capacity(match_block.len()); + let end_label = builder.label(None); + + for (pattern, _) in match_block { + let match_label = builder.label(None); + match_labels.push(match_label); + builder.push( + Instruction::Match { + pattern: Box::new(pattern.pattern.clone()), + src: match_reg, + index: match_label.0, + } + .into_spanned(pattern.span), + )?; + // Also add a label for the next match instruction or failure case + next_labels.push(builder.label(Some(builder.here()))); + } + + // Match fall-through to jump to the end, if no match + builder.load_empty(io_reg)?; + builder.drop_reg(match_reg)?; + builder.jump(end_label, call.head)?; + + // Generate each of the match expressions. Handle guards here, if present. + for (index, (pattern, expr)) in match_block.iter().enumerate() { + let match_label = match_labels[index]; + let next_label = next_labels[index]; + + // `io_reg` and `match_reg` are still valid at each of these branch targets + builder.mark_register(io_reg)?; + builder.mark_register(match_reg)?; + + // Set the original match instruction target here + builder.set_label(match_label, builder.here())?; + + // Handle guard, if present + if let Some(guard) = &pattern.guard { + let guard_reg = builder.next_register()?; + compile_expression( + working_set, + builder, + guard, + RedirectModes::capture_out(guard.span), + None, + guard_reg, + )?; + builder + .push(Instruction::CheckMatchGuard { src: guard_reg }.into_spanned(guard.span))?; + builder.push(Instruction::Not { src_dst: guard_reg }.into_spanned(guard.span))?; + // Branch to the next match instruction if the branch fails to match + builder.branch_if( + guard_reg, + next_label, + // Span the branch with the next pattern, or the head if this is the end + match_block + .get(index + 1) + .map(|b| b.0.span) + .unwrap_or(call.head), + )?; + builder.add_comment("if match guard false"); + } + + // match_reg no longer needed, successful match + builder.drop_reg(match_reg)?; + + // Execute match right hand side expression + if let Some(block_id) = expr.as_block() { + let block = working_set.get_block(block_id); + compile_block( + working_set, + builder, + block, + redirect_modes.clone(), + Some(io_reg), + io_reg, + )?; + } else { + compile_expression( + working_set, + builder, + expr, + redirect_modes.clone(), + Some(io_reg), + io_reg, + )?; + } + + // Jump to the end after the match logic is done + builder.jump(end_label, call.head)?; + builder.add_comment("end match"); + } + + // Set the end destination + builder.set_label(end_label, builder.here())?; + + Ok(()) +} + +/// Compile a call to `let` or `mut` (just do store-variable) +pub(crate) fn compile_let( + working_set: &StateWorkingSet, + builder: &mut BlockBuilder, + call: &Call, + _redirect_modes: RedirectModes, + io_reg: RegId, +) -> Result<(), CompileError> { + // Pseudocode: + // + // %io_reg <- ...... <- %io_reg + // store-variable $var, %io_reg + let invalid = || CompileError::InvalidKeywordCall { + keyword: "let".into(), + span: call.head, + }; + + let var_decl_arg = call.positional_nth(0).ok_or_else(invalid)?; + let block_arg = call.positional_nth(1).ok_or_else(invalid)?; + + let var_id = var_decl_arg.as_var().ok_or_else(invalid)?; + let block_id = block_arg.as_block().ok_or_else(invalid)?; + let block = working_set.get_block(block_id); + + let variable = working_set.get_variable(var_id); + + compile_block( + working_set, + builder, + block, + RedirectModes::capture_out(call.head), + Some(io_reg), + io_reg, + )?; + + // If the variable is a glob type variable, we should cast it with GlobFrom + if variable.ty == Type::Glob { + builder.push( + Instruction::GlobFrom { + src_dst: io_reg, + no_expand: true, + } + .into_spanned(call.head), + )?; + } + + builder.push( + Instruction::StoreVariable { + var_id, + src: io_reg, + } + .into_spanned(call.head), + )?; + builder.add_comment("let"); + + // Don't forget to set io_reg to Empty afterward, as that's the result of an assignment + builder.load_empty(io_reg)?; + + Ok(()) +} + +/// Compile a call to `try`, setting an error handler over the evaluated block +pub(crate) fn compile_try( + working_set: &StateWorkingSet, + builder: &mut BlockBuilder, + call: &Call, + redirect_modes: RedirectModes, + io_reg: RegId, +) -> Result<(), CompileError> { + // Pseudocode (literal block): + // + // on-error-into ERR, %io_reg // or without + // %io_reg <- <...block...> <- %io_reg + // check-external-failed %failed_reg, %io_reg + // branch-if %failed_reg, FAIL + // pop-error-handler + // jump END + // FAIL: drain %io_reg + // unreachable + // ERR: clone %err_reg, %io_reg + // store-variable $err_var, %err_reg // or without + // %io_reg <- <...catch block...> <- %io_reg // set to empty if no catch block + // END: + // + // with expression that can't be inlined: + // + // %closure_reg <- + // on-error-into ERR, %io_reg + // %io_reg <- <...block...> <- %io_reg + // check-external-failed %failed_reg, %io_reg + // branch-if %failed_reg, FAIL + // pop-error-handler + // jump END + // FAIL: drain %io_reg + // unreachable + // ERR: clone %err_reg, %io_reg + // push-positional %closure_reg + // push-positional %err_reg + // call "do", %io_reg + // END: + let invalid = || CompileError::InvalidKeywordCall { + keyword: "try".into(), + span: call.head, + }; + + let block_arg = call.positional_nth(0).ok_or_else(invalid)?; + let block_id = block_arg.as_block().ok_or_else(invalid)?; + let block = working_set.get_block(block_id); + + let catch_expr = match call.positional_nth(1) { + Some(kw_expr) => Some(kw_expr.as_keyword().ok_or_else(invalid)?), + None => None, + }; + let catch_span = catch_expr.map(|e| e.span).unwrap_or(call.head); + + let err_label = builder.label(None); + let failed_label = builder.label(None); + let end_label = builder.label(None); + + // We have two ways of executing `catch`: if it was provided as a literal, we can inline it. + // Otherwise, we have to evaluate the expression and keep it as a register, and then call `do`. + enum CatchType<'a> { + Block { + block: &'a Block, + var_id: Option, + }, + Closure { + closure_reg: RegId, + }, + } + + let catch_type = catch_expr + .map(|catch_expr| match catch_expr.as_block() { + Some(block_id) => { + let block = working_set.get_block(block_id); + let var_id = block.signature.get_positional(0).and_then(|v| v.var_id); + Ok(CatchType::Block { block, var_id }) + } + None => { + // We have to compile the catch_expr and use it as a closure + let closure_reg = builder.next_register()?; + compile_expression( + working_set, + builder, + catch_expr, + RedirectModes::capture_out(catch_expr.span), + None, + closure_reg, + )?; + Ok(CatchType::Closure { closure_reg }) + } + }) + .transpose()?; + + // Put the error handler instruction. If we have a catch expression then we should capture the + // error. + if catch_type.is_some() { + builder.push( + Instruction::OnErrorInto { + index: err_label.0, + dst: io_reg, + } + .into_spanned(call.head), + )? + } else { + // Otherwise, we don't need the error value. + builder.push(Instruction::OnError { index: err_label.0 }.into_spanned(call.head))? + }; + + builder.add_comment("try"); + + // Compile the block + compile_block( + working_set, + builder, + block, + redirect_modes.clone(), + Some(io_reg), + io_reg, + )?; + + // Check for external command exit code failure, and also redirect that to the catch handler + let failed_reg = builder.next_register()?; + builder.push( + Instruction::CheckExternalFailed { + dst: failed_reg, + src: io_reg, + } + .into_spanned(catch_span), + )?; + builder.branch_if(failed_reg, failed_label, catch_span)?; + + // Successful case: pop the error handler + builder.push(Instruction::PopErrorHandler.into_spanned(call.head))?; + + // Jump over the failure case + builder.jump(end_label, catch_span)?; + + // Set up an error handler preamble for failed external. + // Draining the %io_reg results in the error handler being called with Empty, and sets + // $env.LAST_EXIT_CODE + builder.set_label(failed_label, builder.here())?; + builder.drain(io_reg, catch_span)?; + builder.add_comment("branches to err"); + builder.unreachable(catch_span)?; + + // This is the real error handler + builder.set_label(err_label, builder.here())?; + + // Mark out register as likely not clean - state in error handler is not well defined + builder.mark_register(io_reg)?; + + // Now compile whatever is necessary for the error handler + match catch_type { + Some(CatchType::Block { block, var_id }) => { + // Error will be in io_reg + builder.mark_register(io_reg)?; + if let Some(var_id) = var_id { + // Take a copy of the error as $err, since it will also be input + let err_reg = builder.next_register()?; + builder.push( + Instruction::Clone { + dst: err_reg, + src: io_reg, + } + .into_spanned(catch_span), + )?; + builder.push( + Instruction::StoreVariable { + var_id, + src: err_reg, + } + .into_spanned(catch_span), + )?; + } + // Compile the block, now that the variable is set + compile_block( + working_set, + builder, + block, + redirect_modes, + Some(io_reg), + io_reg, + )?; + } + Some(CatchType::Closure { closure_reg }) => { + // We should call `do`. Error will be in io_reg + let do_decl_id = working_set.find_decl(b"do").ok_or_else(|| { + CompileError::MissingRequiredDeclaration { + decl_name: "do".into(), + span: call.head, + } + })?; + + // Take a copy of io_reg, because we pass it both as an argument and input + builder.mark_register(io_reg)?; + let err_reg = builder.next_register()?; + builder.push( + Instruction::Clone { + dst: err_reg, + src: io_reg, + } + .into_spanned(catch_span), + )?; + + // Push the closure and the error + builder + .push(Instruction::PushPositional { src: closure_reg }.into_spanned(catch_span))?; + builder.push(Instruction::PushPositional { src: err_reg }.into_spanned(catch_span))?; + + // Call `$err | do $closure $err` + builder.push( + Instruction::Call { + decl_id: do_decl_id, + src_dst: io_reg, + } + .into_spanned(catch_span), + )?; + } + None => { + // Just set out to empty. + builder.load_empty(io_reg)?; + } + } + + // This is the end - if we succeeded, should jump here + builder.set_label(end_label, builder.here())?; + + Ok(()) +} + +/// Compile a call to `loop` (via `jump`) +pub(crate) fn compile_loop( + working_set: &StateWorkingSet, + builder: &mut BlockBuilder, + call: &Call, + _redirect_modes: RedirectModes, + io_reg: RegId, +) -> Result<(), CompileError> { + // Pseudocode: + // + // drop %io_reg + // LOOP: %io_reg <- ...... + // drain %io_reg + // jump %LOOP + // END: drop %io_reg + let invalid = || CompileError::InvalidKeywordCall { + keyword: "loop".into(), + span: call.head, + }; + + let block_arg = call.positional_nth(0).ok_or_else(invalid)?; + let block_id = block_arg.as_block().ok_or_else(invalid)?; + let block = working_set.get_block(block_id); + + let loop_ = builder.begin_loop(); + builder.load_empty(io_reg)?; + + builder.set_label(loop_.continue_label, builder.here())?; + + compile_block( + working_set, + builder, + block, + RedirectModes::default(), + None, + io_reg, + )?; + + // Drain the output, just like for a semicolon + builder.drain(io_reg, call.head)?; + + builder.jump(loop_.continue_label, call.head)?; + builder.add_comment("loop"); + + builder.set_label(loop_.break_label, builder.here())?; + builder.end_loop(loop_)?; + + // State of %io_reg is not necessarily well defined here due to control flow, so make sure it's + // empty. + builder.mark_register(io_reg)?; + builder.load_empty(io_reg)?; + + Ok(()) +} + +/// Compile a call to `while`, via branch instructions +pub(crate) fn compile_while( + working_set: &StateWorkingSet, + builder: &mut BlockBuilder, + call: &Call, + _redirect_modes: RedirectModes, + io_reg: RegId, +) -> Result<(), CompileError> { + // Pseudocode: + // + // LOOP: %io_reg <- + // branch-if %io_reg, TRUE + // jump FALSE + // TRUE: %io_reg <- ...... + // drain %io_reg + // jump LOOP + // FALSE: drop %io_reg + let invalid = || CompileError::InvalidKeywordCall { + keyword: "while".into(), + span: call.head, + }; + + let cond_arg = call.positional_nth(0).ok_or_else(invalid)?; + let block_arg = call.positional_nth(1).ok_or_else(invalid)?; + let block_id = block_arg.as_block().ok_or_else(invalid)?; + let block = working_set.get_block(block_id); + + let loop_ = builder.begin_loop(); + builder.set_label(loop_.continue_label, builder.here())?; + + let true_label = builder.label(None); + + compile_expression( + working_set, + builder, + cond_arg, + RedirectModes::capture_out(call.head), + None, + io_reg, + )?; + + builder.branch_if(io_reg, true_label, call.head)?; + builder.add_comment("while"); + builder.jump(loop_.break_label, call.head)?; + builder.add_comment("end while"); + + builder.set_label(true_label, builder.here())?; + + compile_block( + working_set, + builder, + block, + RedirectModes::default(), + None, + io_reg, + )?; + + // Drain the result, just like for a semicolon + builder.drain(io_reg, call.head)?; + + builder.jump(loop_.continue_label, call.head)?; + builder.add_comment("while"); + + builder.set_label(loop_.break_label, builder.here())?; + builder.end_loop(loop_)?; + + // State of %io_reg is not necessarily well defined here due to control flow, so make sure it's + // empty. + builder.mark_register(io_reg)?; + builder.load_empty(io_reg)?; + + Ok(()) +} + +/// Compile a call to `for` (via `iterate`) +pub(crate) fn compile_for( + working_set: &StateWorkingSet, + builder: &mut BlockBuilder, + call: &Call, + _redirect_modes: RedirectModes, + io_reg: RegId, +) -> Result<(), CompileError> { + // Pseudocode: + // + // %stream_reg <- + // LOOP: iterate %io_reg, %stream_reg, END + // store-variable $var, %io_reg + // %io_reg <- <...block...> + // drain %io_reg + // jump LOOP + // END: drop %io_reg + let invalid = || CompileError::InvalidKeywordCall { + keyword: "for".into(), + span: call.head, + }; + + if call.get_named_arg("numbered").is_some() { + // This is deprecated and we don't support it. + return Err(invalid()); + } + + let var_decl_arg = call.positional_nth(0).ok_or_else(invalid)?; + let var_id = var_decl_arg.as_var().ok_or_else(invalid)?; + + let in_arg = call.positional_nth(1).ok_or_else(invalid)?; + let in_expr = in_arg.as_keyword().ok_or_else(invalid)?; + + let block_arg = call.positional_nth(2).ok_or_else(invalid)?; + let block_id = block_arg.as_block().ok_or_else(invalid)?; + let block = working_set.get_block(block_id); + + // Ensure io_reg is marked so we don't use it + builder.mark_register(io_reg)?; + + let stream_reg = builder.next_register()?; + + compile_expression( + working_set, + builder, + in_expr, + RedirectModes::capture_out(in_expr.span), + None, + stream_reg, + )?; + + // Set up loop state + let loop_ = builder.begin_loop(); + builder.set_label(loop_.continue_label, builder.here())?; + + // This gets a value from the stream each time it's executed + // io_reg basically will act as our scratch register here + builder.push( + Instruction::Iterate { + dst: io_reg, + stream: stream_reg, + end_index: loop_.break_label.0, + } + .into_spanned(call.head), + )?; + builder.add_comment("for"); + + // Put the received value in the variable + builder.push( + Instruction::StoreVariable { + var_id, + src: io_reg, + } + .into_spanned(var_decl_arg.span), + )?; + + // Do the body of the block + compile_block( + working_set, + builder, + block, + RedirectModes::default(), + None, + io_reg, + )?; + + // Drain the output, just like for a semicolon + builder.drain(io_reg, call.head)?; + + // Loop back to iterate to get the next value + builder.jump(loop_.continue_label, call.head)?; + + // Set the end of the loop + builder.set_label(loop_.break_label, builder.here())?; + builder.end_loop(loop_)?; + + // We don't need stream_reg anymore, after the loop + // io_reg may or may not be empty, so be sure it is + builder.free_register(stream_reg)?; + builder.mark_register(io_reg)?; + builder.load_empty(io_reg)?; + + Ok(()) +} + +/// Compile a call to `break`. +pub(crate) fn compile_break( + _working_set: &StateWorkingSet, + builder: &mut BlockBuilder, + call: &Call, + _redirect_modes: RedirectModes, + io_reg: RegId, +) -> Result<(), CompileError> { + if builder.is_in_loop() { + builder.load_empty(io_reg)?; + builder.push_break(call.head)?; + builder.add_comment("break"); + } else { + // Fall back to calling the command if we can't find the loop target statically + builder.push( + Instruction::Call { + decl_id: call.decl_id, + src_dst: io_reg, + } + .into_spanned(call.head), + )?; + } + Ok(()) +} + +/// Compile a call to `continue`. +pub(crate) fn compile_continue( + _working_set: &StateWorkingSet, + builder: &mut BlockBuilder, + call: &Call, + _redirect_modes: RedirectModes, + io_reg: RegId, +) -> Result<(), CompileError> { + if builder.is_in_loop() { + builder.load_empty(io_reg)?; + builder.push_continue(call.head)?; + builder.add_comment("continue"); + } else { + // Fall back to calling the command if we can't find the loop target statically + builder.push( + Instruction::Call { + decl_id: call.decl_id, + src_dst: io_reg, + } + .into_spanned(call.head), + )?; + } + Ok(()) +} + +/// Compile a call to `return` as a `return-early` instruction. +/// +/// This is not strictly necessary, but it is more efficient. +pub(crate) fn compile_return( + working_set: &StateWorkingSet, + builder: &mut BlockBuilder, + call: &Call, + _redirect_modes: RedirectModes, + io_reg: RegId, +) -> Result<(), CompileError> { + // Pseudocode: + // + // %io_reg <- + // return-early %io_reg + if let Some(arg_expr) = call.positional_nth(0) { + compile_expression( + working_set, + builder, + arg_expr, + RedirectModes::capture_out(arg_expr.span), + None, + io_reg, + )?; + } else { + builder.load_empty(io_reg)?; + } + + // TODO: It would be nice if this could be `return` instead, but there is a little bit of + // behaviour remaining that still depends on `ShellError::Return` + builder.push(Instruction::ReturnEarly { src: io_reg }.into_spanned(call.head))?; + + // io_reg is supposed to remain allocated + builder.load_empty(io_reg)?; + + Ok(()) +} diff --git a/crates/nu-engine/src/compile/mod.rs b/crates/nu-engine/src/compile/mod.rs new file mode 100644 index 0000000000..8f6ae22682 --- /dev/null +++ b/crates/nu-engine/src/compile/mod.rs @@ -0,0 +1,204 @@ +use nu_protocol::{ + ast::{Block, Pipeline, PipelineRedirection, RedirectionSource, RedirectionTarget}, + engine::StateWorkingSet, + ir::{Instruction, IrBlock, RedirectMode}, + CompileError, IntoSpanned, RegId, Span, +}; + +mod builder; +mod call; +mod expression; +mod keyword; +mod operator; +mod redirect; + +use builder::BlockBuilder; +use call::*; +use expression::compile_expression; +use operator::*; +use redirect::*; + +const BLOCK_INPUT: RegId = RegId(0); + +/// Compile Nushell pipeline abstract syntax tree (AST) to internal representation (IR) instructions +/// for evaluation. +pub fn compile(working_set: &StateWorkingSet, block: &Block) -> Result { + let mut builder = BlockBuilder::new(block.span); + + let span = block.span.unwrap_or(Span::unknown()); + + compile_block( + working_set, + &mut builder, + block, + RedirectModes::caller(span), + Some(BLOCK_INPUT), + BLOCK_INPUT, + )?; + + // A complete block has to end with a `return` + builder.push(Instruction::Return { src: BLOCK_INPUT }.into_spanned(span))?; + + builder.finish() +} + +/// Compiles a [`Block`] in-place into an IR block. This can be used in a nested manner, for example +/// by [`compile_if()`], where the instructions for the blocks for the if/else are inlined into the +/// top-level IR block. +fn compile_block( + working_set: &StateWorkingSet, + builder: &mut BlockBuilder, + block: &Block, + redirect_modes: RedirectModes, + in_reg: Option, + out_reg: RegId, +) -> Result<(), CompileError> { + let span = block.span.unwrap_or(Span::unknown()); + let mut redirect_modes = Some(redirect_modes); + if !block.pipelines.is_empty() { + let last_index = block.pipelines.len() - 1; + for (index, pipeline) in block.pipelines.iter().enumerate() { + compile_pipeline( + working_set, + builder, + pipeline, + span, + // the redirect mode only applies to the last pipeline. + if index == last_index { + redirect_modes + .take() + .expect("should only take redirect_modes once") + } else { + RedirectModes::default() + }, + // input is only passed to the first pipeline. + if index == 0 { in_reg } else { None }, + out_reg, + )?; + + if index != last_index { + // Explicitly drain the out reg after each non-final pipeline, because that's how + // the semicolon functions. + if builder.is_allocated(out_reg) { + builder.push(Instruction::Drain { src: out_reg }.into_spanned(span))?; + } + builder.load_empty(out_reg)?; + } + } + Ok(()) + } else if in_reg.is_none() { + builder.load_empty(out_reg) + } else { + Ok(()) + } +} + +fn compile_pipeline( + working_set: &StateWorkingSet, + builder: &mut BlockBuilder, + pipeline: &Pipeline, + fallback_span: Span, + redirect_modes: RedirectModes, + in_reg: Option, + out_reg: RegId, +) -> Result<(), CompileError> { + let mut iter = pipeline.elements.iter().peekable(); + let mut in_reg = in_reg; + let mut redirect_modes = Some(redirect_modes); + while let Some(element) = iter.next() { + let span = element.pipe.unwrap_or(fallback_span); + + // We have to get the redirection mode from either the explicit redirection in the pipeline + // element, or from the next expression if it's specified there. If this is the last + // element, then it's from whatever is passed in as the mode to use. + + let next_redirect_modes = if let Some(next_element) = iter.peek() { + let mut modes = redirect_modes_of_expression(working_set, &next_element.expr, span)?; + + // If there's a next element with no inherent redirection we always pipe out *unless* + // this is a single redirection of stderr to pipe (e>|) + if modes.out.is_none() + && !matches!( + element.redirection, + Some(PipelineRedirection::Single { + source: RedirectionSource::Stderr, + target: RedirectionTarget::Pipe { .. } + }) + ) + { + let pipe_span = next_element.pipe.unwrap_or(next_element.expr.span); + modes.out = Some(RedirectMode::Pipe.into_spanned(pipe_span)); + } + + modes + } else { + redirect_modes + .take() + .expect("should only take redirect_modes once") + }; + + let spec_redirect_modes = match &element.redirection { + Some(PipelineRedirection::Single { source, target }) => { + let mode = redirection_target_to_mode(working_set, builder, target)?; + match source { + RedirectionSource::Stdout => RedirectModes { + out: Some(mode), + err: None, + }, + RedirectionSource::Stderr => RedirectModes { + out: None, + err: Some(mode), + }, + RedirectionSource::StdoutAndStderr => RedirectModes { + out: Some(mode), + err: Some(mode), + }, + } + } + Some(PipelineRedirection::Separate { out, err }) => { + // In this case, out and err must not both be Pipe + assert!( + !matches!( + (out, err), + ( + RedirectionTarget::Pipe { .. }, + RedirectionTarget::Pipe { .. } + ) + ), + "for Separate redirection, out and err targets must not both be Pipe" + ); + let out = redirection_target_to_mode(working_set, builder, out)?; + let err = redirection_target_to_mode(working_set, builder, err)?; + RedirectModes { + out: Some(out), + err: Some(err), + } + } + None => RedirectModes { + out: None, + err: None, + }, + }; + + let redirect_modes = RedirectModes { + out: spec_redirect_modes.out.or(next_redirect_modes.out), + err: spec_redirect_modes.err.or(next_redirect_modes.err), + }; + + compile_expression( + working_set, + builder, + &element.expr, + redirect_modes.clone(), + in_reg, + out_reg, + )?; + + // Clean up the redirection + finish_redirection(builder, redirect_modes, out_reg)?; + + // The next pipeline element takes input from this output + in_reg = Some(out_reg); + } + Ok(()) +} diff --git a/crates/nu-engine/src/compile/operator.rs b/crates/nu-engine/src/compile/operator.rs new file mode 100644 index 0000000000..a1ed3f66df --- /dev/null +++ b/crates/nu-engine/src/compile/operator.rs @@ -0,0 +1,378 @@ +use nu_protocol::{ + ast::{Assignment, Boolean, CellPath, Expr, Expression, Math, Operator, PathMember}, + engine::StateWorkingSet, + ir::{Instruction, Literal}, + IntoSpanned, RegId, Span, Spanned, ENV_VARIABLE_ID, +}; +use nu_utils::IgnoreCaseExt; + +use super::{compile_expression, BlockBuilder, CompileError, RedirectModes}; + +pub(crate) fn compile_binary_op( + working_set: &StateWorkingSet, + builder: &mut BlockBuilder, + lhs: &Expression, + op: Spanned, + rhs: &Expression, + span: Span, + out_reg: RegId, +) -> Result<(), CompileError> { + if let Operator::Assignment(assign_op) = op.item { + if let Some(decomposed_op) = decompose_assignment(assign_op) { + // Compiling an assignment that uses a binary op with the existing value + compile_binary_op( + working_set, + builder, + lhs, + decomposed_op.into_spanned(op.span), + rhs, + span, + out_reg, + )?; + } else { + // Compiling a plain assignment, where the current left-hand side value doesn't matter + compile_expression( + working_set, + builder, + rhs, + RedirectModes::capture_out(rhs.span), + None, + out_reg, + )?; + } + + compile_assignment(working_set, builder, lhs, op.span, out_reg)?; + + // Load out_reg with Nothing, as that's the result of an assignment + builder.load_literal(out_reg, Literal::Nothing.into_spanned(op.span)) + } else { + // Not an assignment: just do the binary op + let lhs_reg = out_reg; + + compile_expression( + working_set, + builder, + lhs, + RedirectModes::capture_out(lhs.span), + None, + lhs_reg, + )?; + + match op.item { + // `and` / `or` are short-circuiting, and we can get by with one register and a branch + Operator::Boolean(Boolean::And) => { + let true_label = builder.label(None); + builder.branch_if(lhs_reg, true_label, op.span)?; + + // If the branch was not taken it's false, so short circuit to load false + let false_label = builder.label(None); + builder.jump(false_label, op.span)?; + + builder.set_label(true_label, builder.here())?; + compile_expression( + working_set, + builder, + rhs, + RedirectModes::capture_out(rhs.span), + None, + lhs_reg, + )?; + + let end_label = builder.label(None); + builder.jump(end_label, op.span)?; + + // Consumed by `branch-if`, so we have to set it false again + builder.set_label(false_label, builder.here())?; + builder.load_literal(lhs_reg, Literal::Bool(false).into_spanned(lhs.span))?; + + builder.set_label(end_label, builder.here())?; + } + Operator::Boolean(Boolean::Or) => { + let true_label = builder.label(None); + builder.branch_if(lhs_reg, true_label, op.span)?; + + // If the branch was not taken it's false, so do the right-side expression + compile_expression( + working_set, + builder, + rhs, + RedirectModes::capture_out(rhs.span), + None, + lhs_reg, + )?; + + let end_label = builder.label(None); + builder.jump(end_label, op.span)?; + + // Consumed by `branch-if`, so we have to set it true again + builder.set_label(true_label, builder.here())?; + builder.load_literal(lhs_reg, Literal::Bool(true).into_spanned(lhs.span))?; + + builder.set_label(end_label, builder.here())?; + } + _ => { + // Any other operator, via `binary-op` + let rhs_reg = builder.next_register()?; + + compile_expression( + working_set, + builder, + rhs, + RedirectModes::capture_out(rhs.span), + None, + rhs_reg, + )?; + + builder.push( + Instruction::BinaryOp { + lhs_dst: lhs_reg, + op: op.item, + rhs: rhs_reg, + } + .into_spanned(op.span), + )?; + } + } + + if lhs_reg != out_reg { + builder.push( + Instruction::Move { + dst: out_reg, + src: lhs_reg, + } + .into_spanned(op.span), + )?; + } + + builder.push(Instruction::Span { src_dst: out_reg }.into_spanned(span))?; + + Ok(()) + } +} + +/// The equivalent plain operator to use for an assignment, if any +pub(crate) fn decompose_assignment(assignment: Assignment) -> Option { + match assignment { + Assignment::Assign => None, + Assignment::PlusAssign => Some(Operator::Math(Math::Plus)), + Assignment::AppendAssign => Some(Operator::Math(Math::Append)), + Assignment::MinusAssign => Some(Operator::Math(Math::Minus)), + Assignment::MultiplyAssign => Some(Operator::Math(Math::Multiply)), + Assignment::DivideAssign => Some(Operator::Math(Math::Divide)), + } +} + +/// Compile assignment of the value in a register to a left-hand expression +pub(crate) fn compile_assignment( + working_set: &StateWorkingSet, + builder: &mut BlockBuilder, + lhs: &Expression, + assignment_span: Span, + rhs_reg: RegId, +) -> Result<(), CompileError> { + match lhs.expr { + Expr::Var(var_id) => { + // Double check that the variable is supposed to be mutable + if !working_set.get_variable(var_id).mutable { + return Err(CompileError::AssignmentRequiresMutableVar { span: lhs.span }); + } + + builder.push( + Instruction::StoreVariable { + var_id, + src: rhs_reg, + } + .into_spanned(assignment_span), + )?; + Ok(()) + } + Expr::FullCellPath(ref path) => match (&path.head, &path.tail) { + ( + Expression { + expr: Expr::Var(var_id), + .. + }, + _, + ) if *var_id == ENV_VARIABLE_ID => { + // This will be an assignment to an environment variable. + let Some(PathMember::String { val: key, .. }) = path.tail.first() else { + return Err(CompileError::CannotReplaceEnv { span: lhs.span }); + }; + + // Some env vars can't be set by Nushell code. + const AUTOMATIC_NAMES: &[&str] = &["PWD", "FILE_PWD", "CURRENT_FILE"]; + if AUTOMATIC_NAMES.iter().any(|name| key.eq_ignore_case(name)) { + return Err(CompileError::AutomaticEnvVarSetManually { + envvar_name: "PWD".into(), + span: lhs.span, + }); + } + + let key_data = builder.data(key)?; + + let val_reg = if path.tail.len() > 1 { + // Get the current value of the head and first tail of the path, from env + let head_reg = builder.next_register()?; + + // We could use compile_load_env, but this shares the key data... + // Always use optional, because it doesn't matter if it's already there + builder.push( + Instruction::LoadEnvOpt { + dst: head_reg, + key: key_data, + } + .into_spanned(lhs.span), + )?; + + // Default to empty record so we can do further upserts + let default_label = builder.label(None); + let upsert_label = builder.label(None); + builder.branch_if_empty(head_reg, default_label, assignment_span)?; + builder.jump(upsert_label, assignment_span)?; + + builder.set_label(default_label, builder.here())?; + builder.load_literal( + head_reg, + Literal::Record { capacity: 0 }.into_spanned(lhs.span), + )?; + + // Do the upsert on the current value to incorporate rhs + builder.set_label(upsert_label, builder.here())?; + compile_upsert_cell_path( + builder, + (&path.tail[1..]).into_spanned(lhs.span), + head_reg, + rhs_reg, + assignment_span, + )?; + + head_reg + } else { + // Path has only one tail, so we don't need the current value to do an upsert, + // just set it directly to rhs + rhs_reg + }; + + // Finally, store the modified env variable + builder.push( + Instruction::StoreEnv { + key: key_data, + src: val_reg, + } + .into_spanned(assignment_span), + )?; + Ok(()) + } + (_, tail) if tail.is_empty() => { + // If the path tail is empty, we can really just treat this as if it were an + // assignment to the head + compile_assignment(working_set, builder, &path.head, assignment_span, rhs_reg) + } + _ => { + // Just a normal assignment to some path + let head_reg = builder.next_register()?; + + // Compile getting current value of the head expression + compile_expression( + working_set, + builder, + &path.head, + RedirectModes::capture_out(path.head.span), + None, + head_reg, + )?; + + // Upsert the tail of the path into the old value of the head expression + compile_upsert_cell_path( + builder, + path.tail.as_slice().into_spanned(lhs.span), + head_reg, + rhs_reg, + assignment_span, + )?; + + // Now compile the assignment of the updated value to the head + compile_assignment(working_set, builder, &path.head, assignment_span, head_reg) + } + }, + Expr::Garbage => Err(CompileError::Garbage { span: lhs.span }), + _ => Err(CompileError::AssignmentRequiresVar { span: lhs.span }), + } +} + +/// Compile an upsert-cell-path instruction, with known literal members +pub(crate) fn compile_upsert_cell_path( + builder: &mut BlockBuilder, + members: Spanned<&[PathMember]>, + src_dst: RegId, + new_value: RegId, + span: Span, +) -> Result<(), CompileError> { + let path_reg = builder.literal( + Literal::CellPath( + CellPath { + members: members.item.to_vec(), + } + .into(), + ) + .into_spanned(members.span), + )?; + builder.push( + Instruction::UpsertCellPath { + src_dst, + path: path_reg, + new_value, + } + .into_spanned(span), + )?; + Ok(()) +} + +/// Compile the correct sequence to get an environment variable + follow a path on it +pub(crate) fn compile_load_env( + builder: &mut BlockBuilder, + span: Span, + path: &[PathMember], + out_reg: RegId, +) -> Result<(), CompileError> { + if path.is_empty() { + builder.push( + Instruction::LoadVariable { + dst: out_reg, + var_id: ENV_VARIABLE_ID, + } + .into_spanned(span), + )?; + } else { + let (key, optional) = match &path[0] { + PathMember::String { val, optional, .. } => (builder.data(val)?, *optional), + PathMember::Int { span, .. } => { + return Err(CompileError::AccessEnvByInt { span: *span }) + } + }; + let tail = &path[1..]; + + if optional { + builder.push(Instruction::LoadEnvOpt { dst: out_reg, key }.into_spanned(span))?; + } else { + builder.push(Instruction::LoadEnv { dst: out_reg, key }.into_spanned(span))?; + } + + if !tail.is_empty() { + let path = builder.literal( + Literal::CellPath(Box::new(CellPath { + members: tail.to_vec(), + })) + .into_spanned(span), + )?; + builder.push( + Instruction::FollowCellPath { + src_dst: out_reg, + path, + } + .into_spanned(span), + )?; + } + } + Ok(()) +} diff --git a/crates/nu-engine/src/compile/redirect.rs b/crates/nu-engine/src/compile/redirect.rs new file mode 100644 index 0000000000..15af1a9f8c --- /dev/null +++ b/crates/nu-engine/src/compile/redirect.rs @@ -0,0 +1,157 @@ +use nu_protocol::{ + ast::{Expression, RedirectionTarget}, + engine::StateWorkingSet, + ir::{Instruction, RedirectMode}, + IntoSpanned, OutDest, RegId, Span, Spanned, +}; + +use super::{compile_expression, BlockBuilder, CompileError}; + +#[derive(Default, Clone)] +pub(crate) struct RedirectModes { + pub(crate) out: Option>, + pub(crate) err: Option>, +} + +impl RedirectModes { + pub(crate) fn capture_out(span: Span) -> Self { + RedirectModes { + out: Some(RedirectMode::Capture.into_spanned(span)), + err: None, + } + } + + pub(crate) fn caller(span: Span) -> RedirectModes { + RedirectModes { + out: Some(RedirectMode::Caller.into_spanned(span)), + err: Some(RedirectMode::Caller.into_spanned(span)), + } + } +} + +pub(crate) fn redirection_target_to_mode( + working_set: &StateWorkingSet, + builder: &mut BlockBuilder, + target: &RedirectionTarget, +) -> Result, CompileError> { + Ok(match target { + RedirectionTarget::File { + expr, + append, + span: redir_span, + } => { + let file_num = builder.next_file_num()?; + let path_reg = builder.next_register()?; + compile_expression( + working_set, + builder, + expr, + RedirectModes::capture_out(*redir_span), + None, + path_reg, + )?; + builder.push( + Instruction::OpenFile { + file_num, + path: path_reg, + append: *append, + } + .into_spanned(*redir_span), + )?; + RedirectMode::File { file_num }.into_spanned(*redir_span) + } + RedirectionTarget::Pipe { span } => RedirectMode::Pipe.into_spanned(*span), + }) +} + +pub(crate) fn redirect_modes_of_expression( + working_set: &StateWorkingSet, + expression: &Expression, + redir_span: Span, +) -> Result { + let (out, err) = expression.expr.pipe_redirection(working_set); + Ok(RedirectModes { + out: out + .map(|r| r.into_spanned(redir_span)) + .map(out_dest_to_redirect_mode) + .transpose()?, + err: err + .map(|r| r.into_spanned(redir_span)) + .map(out_dest_to_redirect_mode) + .transpose()?, + }) +} + +/// Finish the redirection for an expression, writing to and closing files as necessary +pub(crate) fn finish_redirection( + builder: &mut BlockBuilder, + modes: RedirectModes, + out_reg: RegId, +) -> Result<(), CompileError> { + if let Some(Spanned { + item: RedirectMode::File { file_num }, + span, + }) = modes.out + { + // If out is a file and err is a pipe, we must not consume the expression result - + // that is actually the err, in that case. + if !matches!( + modes.err, + Some(Spanned { + item: RedirectMode::Pipe { .. }, + .. + }) + ) { + builder.push( + Instruction::WriteFile { + file_num, + src: out_reg, + } + .into_spanned(span), + )?; + builder.load_empty(out_reg)?; + } + builder.push(Instruction::CloseFile { file_num }.into_spanned(span))?; + } + + match modes.err { + Some(Spanned { + item: RedirectMode::File { file_num }, + span, + }) => { + // Close the file, unless it's the same as out (in which case it was already closed) + if !modes.out.is_some_and(|out_mode| match out_mode.item { + RedirectMode::File { + file_num: out_file_num, + } => file_num == out_file_num, + _ => false, + }) { + builder.push(Instruction::CloseFile { file_num }.into_spanned(span))?; + } + } + Some(Spanned { + item: RedirectMode::Pipe, + span, + }) => { + builder.push(Instruction::CheckErrRedirected { src: out_reg }.into_spanned(span))?; + } + _ => (), + } + + Ok(()) +} + +pub(crate) fn out_dest_to_redirect_mode( + out_dest: Spanned, +) -> Result, CompileError> { + let span = out_dest.span; + out_dest + .map(|out_dest| match out_dest { + OutDest::Pipe => Ok(RedirectMode::Pipe), + OutDest::Capture => Ok(RedirectMode::Capture), + OutDest::Null => Ok(RedirectMode::Null), + OutDest::Inherit => Ok(RedirectMode::Inherit), + OutDest::File(_) => Err(CompileError::InvalidRedirectMode { span }), + }) + .transpose() +} diff --git a/crates/nu-engine/src/documentation.rs b/crates/nu-engine/src/documentation.rs index a7d4950036..7840d03c47 100644 --- a/crates/nu-engine/src/documentation.rs +++ b/crates/nu-engine/src/documentation.rs @@ -45,10 +45,12 @@ fn nu_highlight_string(code_string: &str, engine_state: &EngineState, stack: &mu if let Some(highlighter) = engine_state.find_decl(b"nu-highlight", &[]) { let decl = engine_state.get_decl(highlighter); + let call = Call::new(Span::unknown()); + if let Ok(output) = decl.run( engine_state, stack, - &Call::new(Span::unknown()), + &(&call).into(), Value::string(code_string, Span::unknown()).into_pipeline_data(), ) { let result = output.into_value(Span::unknown()); @@ -269,11 +271,12 @@ fn get_documentation( let _ = write!(long_desc, "\n > {}\n", example.example); } else if let Some(highlighter) = engine_state.find_decl(b"nu-highlight", &[]) { let decl = engine_state.get_decl(highlighter); + let call = Call::new(Span::unknown()); match decl.run( engine_state, stack, - &Call::new(Span::unknown()), + &(&call).into(), Value::string(example.example, Span::unknown()).into_pipeline_data(), ) { Ok(output) => { @@ -326,7 +329,7 @@ fn get_documentation( .run( engine_state, stack, - &table_call, + &(&table_call).into(), PipelineData::Value(result.clone(), None), ) .ok() diff --git a/crates/nu-engine/src/env.rs b/crates/nu-engine/src/env.rs index 048d9bfb99..ab3a4bc50c 100644 --- a/crates/nu-engine/src/env.rs +++ b/crates/nu-engine/src/env.rs @@ -1,8 +1,8 @@ use crate::ClosureEvalOnce; use nu_path::canonicalize_with; use nu_protocol::{ - ast::{Call, Expr}, - engine::{EngineState, Stack, StateWorkingSet}, + ast::Expr, + engine::{Call, EngineState, Stack, StateWorkingSet}, Config, ShellError, Span, Value, VarId, }; use std::{ @@ -244,14 +244,15 @@ pub fn path_str( } pub const DIR_VAR_PARSER_INFO: &str = "dirs_var"; -pub fn get_dirs_var_from_call(call: &Call) -> Option { - call.get_parser_info(DIR_VAR_PARSER_INFO).and_then(|x| { - if let Expr::Var(id) = x.expr { - Some(id) - } else { - None - } - }) +pub fn get_dirs_var_from_call(stack: &Stack, call: &Call) -> Option { + call.get_parser_info(stack, DIR_VAR_PARSER_INFO) + .and_then(|x| { + if let Expr::Var(id) = x.expr { + Some(id) + } else { + None + } + }) } /// This helper function is used to find files during eval diff --git a/crates/nu-engine/src/eval.rs b/crates/nu-engine/src/eval.rs index b495589011..6e171eb46c 100644 --- a/crates/nu-engine/src/eval.rs +++ b/crates/nu-engine/src/eval.rs @@ -1,3 +1,4 @@ +use crate::eval_ir_block; #[allow(deprecated)] use crate::{current_dir, get_config, get_full_help}; use nu_path::{expand_path_with, AbsolutePathBuf}; @@ -7,7 +8,7 @@ use nu_protocol::{ PipelineRedirection, RedirectionSource, RedirectionTarget, }, debugger::DebugContext, - engine::{Closure, EngineState, Redirection, Stack}, + engine::{Closure, EngineState, Redirection, Stack, StateWorkingSet}, eval_base::Eval, ByteStreamSource, Config, FromValue, IntoPipelineData, OutDest, PipelineData, ShellError, Span, Spanned, Type, Value, VarId, ENV_VARIABLE_ID, @@ -174,7 +175,7 @@ pub fn eval_call( // We pass caller_stack here with the knowledge that internal commands // are going to be specifically looking for global state in the stack // rather than any local state. - decl.run(engine_state, caller_stack, call, input) + decl.run(engine_state, caller_stack, &call.into(), input) } } @@ -223,7 +224,7 @@ fn eval_external( } } - command.run(engine_state, stack, &call, input) + command.run(engine_state, stack, &(&call).into(), input) } pub fn eval_expression( @@ -507,6 +508,11 @@ pub fn eval_block( block: &Block, mut input: PipelineData, ) -> Result { + // Remove once IR is the default. + if stack.use_ir { + return eval_ir_block::(engine_state, stack, block, input); + } + D::enter_block(engine_state, block); let num_pipelines = block.len(); @@ -521,7 +527,7 @@ pub fn eval_block( for (i, element) in elements.iter().enumerate() { let next = elements.get(i + 1).unwrap_or(last); - let (next_out, next_err) = next.pipe_redirection(engine_state); + let (next_out, next_err) = next.pipe_redirection(&StateWorkingSet::new(engine_state)); let (stdout, stderr) = eval_element_redirection::( engine_state, stack, @@ -903,7 +909,7 @@ impl Eval for EvalRuntime { /// /// An automatic environment variable cannot be assigned to by user code. /// Current there are three of them: $env.PWD, $env.FILE_PWD, $env.CURRENT_FILE -fn is_automatic_env_var(var: &str) -> bool { +pub(crate) fn is_automatic_env_var(var: &str) -> bool { let names = ["PWD", "FILE_PWD", "CURRENT_FILE"]; names.iter().any(|&name| { if cfg!(windows) { diff --git a/crates/nu-engine/src/eval_helpers.rs b/crates/nu-engine/src/eval_helpers.rs index 66bda3e0eb..65ebc6b61d 100644 --- a/crates/nu-engine/src/eval_helpers.rs +++ b/crates/nu-engine/src/eval_helpers.rs @@ -1,6 +1,6 @@ use crate::{ eval_block, eval_block_with_early_return, eval_expression, eval_expression_with_input, - eval_subexpression, + eval_ir_block, eval_subexpression, }; use nu_protocol::{ ast::{Block, Expression}, @@ -13,6 +13,10 @@ use nu_protocol::{ pub type EvalBlockFn = fn(&EngineState, &mut Stack, &Block, PipelineData) -> Result; +/// Type of eval_ir_block() function +pub type EvalIrBlockFn = + fn(&EngineState, &mut Stack, &Block, PipelineData) -> Result; + /// Type of eval_block_with_early_return() function pub type EvalBlockWithEarlyReturnFn = fn(&EngineState, &mut Stack, &Block, PipelineData) -> Result; @@ -42,6 +46,16 @@ pub fn get_eval_block(engine_state: &EngineState) -> EvalBlockFn { } } +/// Helper function to fetch `eval_ir_block()` with the correct type parameter based on whether +/// engine_state is configured with or without a debugger. +pub fn get_eval_ir_block(engine_state: &EngineState) -> EvalIrBlockFn { + if engine_state.is_debugging() { + eval_ir_block:: + } else { + eval_ir_block:: + } +} + /// Helper function to fetch `eval_block_with_early_return()` with the correct type parameter based /// on whether engine_state is configured with or without a debugger. pub fn get_eval_block_with_early_return(engine_state: &EngineState) -> EvalBlockWithEarlyReturnFn { diff --git a/crates/nu-engine/src/eval_ir.rs b/crates/nu-engine/src/eval_ir.rs new file mode 100644 index 0000000000..a505c9be34 --- /dev/null +++ b/crates/nu-engine/src/eval_ir.rs @@ -0,0 +1,1462 @@ +use std::{borrow::Cow, fs::File, sync::Arc}; + +use nu_path::{expand_path_with, AbsolutePathBuf}; +use nu_protocol::{ + ast::{Bits, Block, Boolean, CellPath, Comparison, Math, Operator}, + debugger::DebugContext, + engine::{Argument, Closure, EngineState, ErrorHandler, Matcher, Redirection, Stack}, + ir::{Call, DataSlice, Instruction, IrAstRef, IrBlock, Literal, RedirectMode}, + record, ByteStreamSource, DeclId, ErrSpan, Flag, IntoPipelineData, IntoSpanned, ListStream, + OutDest, PipelineData, PositionalArg, Range, Record, RegId, ShellError, Signals, Signature, + Span, Spanned, Type, Value, VarId, ENV_VARIABLE_ID, +}; +use nu_utils::IgnoreCaseExt; + +use crate::{eval::is_automatic_env_var, eval_block_with_early_return}; + +/// Evaluate the compiled representation of a [`Block`]. +pub fn eval_ir_block( + engine_state: &EngineState, + stack: &mut Stack, + block: &Block, + input: PipelineData, +) -> Result { + // Rust does not check recursion limits outside of const evaluation. + // But nu programs run in the same process as the shell. + // To prevent a stack overflow in user code from crashing the shell, + // we limit the recursion depth of function calls. + let maximum_call_stack_depth: u64 = engine_state.config.recursion_limit as u64; + if stack.recursion_count > maximum_call_stack_depth { + return Err(ShellError::RecursionLimitReached { + recursion_limit: maximum_call_stack_depth, + span: block.span, + }); + } + + if let Some(ir_block) = &block.ir_block { + D::enter_block(engine_state, block); + + let args_base = stack.arguments.get_base(); + let error_handler_base = stack.error_handlers.get_base(); + + // Allocate and initialize registers. I've found that it's not really worth trying to avoid + // the heap allocation here by reusing buffers - our allocator is fast enough + let mut registers = Vec::with_capacity(ir_block.register_count as usize); + for _ in 0..ir_block.register_count { + registers.push(PipelineData::Empty); + } + + // Initialize file storage. + let mut files = vec![None; ir_block.file_count as usize]; + + let result = eval_ir_block_impl::( + &mut EvalContext { + engine_state, + stack, + data: &ir_block.data, + block_span: &block.span, + args_base, + error_handler_base, + redirect_out: None, + redirect_err: None, + matches: vec![], + registers: &mut registers[..], + files: &mut files[..], + }, + ir_block, + input, + ); + + stack.error_handlers.leave_frame(error_handler_base); + stack.arguments.leave_frame(args_base); + + D::leave_block(engine_state, block); + + result + } else { + // FIXME blocks having IR should not be optional + Err(ShellError::GenericError { + error: "Can't evaluate block in IR mode".into(), + msg: "block is missing compiled representation".into(), + span: block.span, + help: Some("the IrBlock is probably missing due to a compilation error".into()), + inner: vec![], + }) + } +} + +/// All of the pointers necessary for evaluation +struct EvalContext<'a> { + engine_state: &'a EngineState, + stack: &'a mut Stack, + data: &'a Arc<[u8]>, + /// The span of the block + block_span: &'a Option, + /// Base index on the argument stack to reset to after a call + args_base: usize, + /// Base index on the error handler stack to reset to after a call + error_handler_base: usize, + /// State set by redirect-out + redirect_out: Option, + /// State set by redirect-err + redirect_err: Option, + /// Scratch space to use for `match` + matches: Vec<(VarId, Value)>, + /// Intermediate pipeline data storage used by instructions, indexed by RegId + registers: &'a mut [PipelineData], + /// Holds open files used by redirections + files: &'a mut [Option>], +} + +impl<'a> EvalContext<'a> { + /// Replace the contents of a register with a new value + #[inline] + fn put_reg(&mut self, reg_id: RegId, new_value: PipelineData) { + // log::trace!("{reg_id} <- {new_value:?}"); + self.registers[reg_id.0 as usize] = new_value; + } + + /// Borrow the contents of a register. + #[inline] + fn borrow_reg(&self, reg_id: RegId) -> &PipelineData { + &self.registers[reg_id.0 as usize] + } + + /// Replace the contents of a register with `Empty` and then return the value that it contained + #[inline] + fn take_reg(&mut self, reg_id: RegId) -> PipelineData { + // log::trace!("<- {reg_id}"); + std::mem::replace(&mut self.registers[reg_id.0 as usize], PipelineData::Empty) + } + + /// Clone data from a register. Must be collected first. + fn clone_reg(&mut self, reg_id: RegId, error_span: Span) -> Result { + match &self.registers[reg_id.0 as usize] { + PipelineData::Empty => Ok(PipelineData::Empty), + PipelineData::Value(val, meta) => Ok(PipelineData::Value(val.clone(), meta.clone())), + _ => Err(ShellError::IrEvalError { + msg: "Must collect to value before using instruction that clones from a register" + .into(), + span: Some(error_span), + }), + } + } + + /// Clone a value from a register. Must be collected first. + fn clone_reg_value(&mut self, reg_id: RegId, fallback_span: Span) -> Result { + match self.clone_reg(reg_id, fallback_span)? { + PipelineData::Empty => Ok(Value::nothing(fallback_span)), + PipelineData::Value(val, _) => Ok(val), + _ => unreachable!("clone_reg should never return stream data"), + } + } + + /// Take and implicitly collect a register to a value + fn collect_reg(&mut self, reg_id: RegId, fallback_span: Span) -> Result { + let data = self.take_reg(reg_id); + let span = data.span().unwrap_or(fallback_span); + data.into_value(span) + } + + /// Get a string from data or produce evaluation error if it's invalid UTF-8 + fn get_str(&self, slice: DataSlice, error_span: Span) -> Result<&'a str, ShellError> { + std::str::from_utf8(&self.data[slice]).map_err(|_| ShellError::IrEvalError { + msg: format!("data slice does not refer to valid UTF-8: {slice:?}"), + span: Some(error_span), + }) + } +} + +/// Eval an IR block on the provided slice of registers. +fn eval_ir_block_impl( + ctx: &mut EvalContext<'_>, + ir_block: &IrBlock, + input: PipelineData, +) -> Result { + if !ctx.registers.is_empty() { + ctx.registers[0] = input; + } + + // Program counter, starts at zero. + let mut pc = 0; + + while pc < ir_block.instructions.len() { + let instruction = &ir_block.instructions[pc]; + let span = &ir_block.spans[pc]; + let ast = &ir_block.ast[pc]; + log::trace!( + "{pc:-4}: {}", + instruction.display(ctx.engine_state, ctx.data) + ); + match eval_instruction::(ctx, instruction, span, ast) { + Ok(InstructionResult::Continue) => { + pc += 1; + } + Ok(InstructionResult::Branch(next_pc)) => { + pc = next_pc; + } + Ok(InstructionResult::Return(reg_id)) => { + return Ok(ctx.take_reg(reg_id)); + } + Ok(InstructionResult::ExitCode(exit_code)) => { + if let Some(error_handler) = ctx.stack.error_handlers.pop(ctx.error_handler_base) { + // If an error handler is set, branch there + prepare_error_handler(ctx, error_handler, None); + pc = error_handler.handler_index; + } else { + // If not, exit the block with the exit code + return Ok(PipelineData::new_external_stream_with_only_exit_code( + exit_code, + )); + } + } + Err( + err @ (ShellError::Return { .. } + | ShellError::Continue { .. } + | ShellError::Break { .. }), + ) => { + // These block control related errors should be passed through + return Err(err); + } + Err(err) => { + if let Some(error_handler) = ctx.stack.error_handlers.pop(ctx.error_handler_base) { + // If an error handler is set, branch there + prepare_error_handler(ctx, error_handler, Some(err.into_spanned(*span))); + pc = error_handler.handler_index; + } else { + // If not, exit the block with the error + return Err(err); + } + } + } + } + + // Fell out of the loop, without encountering a Return. + Err(ShellError::IrEvalError { + msg: format!( + "Program counter out of range (pc={pc}, len={len})", + len = ir_block.instructions.len(), + ), + span: *ctx.block_span, + }) +} + +/// Prepare the context for an error handler +fn prepare_error_handler( + ctx: &mut EvalContext<'_>, + error_handler: ErrorHandler, + error: Option>, +) { + if let Some(reg_id) = error_handler.error_register { + if let Some(error) = error { + // Create the error value and put it in the register + let value = Value::record( + record! { + "msg" => Value::string(format!("{}", error.item), error.span), + "debug" => Value::string(format!("{:?}", error.item), error.span), + "raw" => Value::error(error.item, error.span), + }, + error.span, + ); + ctx.put_reg(reg_id, PipelineData::Value(value, None)); + } else { + // Set the register to empty + ctx.put_reg(reg_id, PipelineData::Empty); + } + } +} + +/// The result of performing an instruction. Describes what should happen next +#[derive(Debug)] +enum InstructionResult { + Continue, + Branch(usize), + Return(RegId), + ExitCode(i32), +} + +/// Perform an instruction +fn eval_instruction( + ctx: &mut EvalContext<'_>, + instruction: &Instruction, + span: &Span, + ast: &Option, +) -> Result { + use self::InstructionResult::*; + + // See the docs for `Instruction` for more information on what these instructions are supposed + // to do. + match instruction { + Instruction::Unreachable => Err(ShellError::IrEvalError { + msg: "Reached unreachable code".into(), + span: Some(*span), + }), + Instruction::LoadLiteral { dst, lit } => load_literal(ctx, *dst, lit, *span), + Instruction::LoadValue { dst, val } => { + ctx.put_reg(*dst, Value::clone(val).into_pipeline_data()); + Ok(Continue) + } + Instruction::Move { dst, src } => { + let val = ctx.take_reg(*src); + ctx.put_reg(*dst, val); + Ok(Continue) + } + Instruction::Clone { dst, src } => { + let data = ctx.clone_reg(*src, *span)?; + ctx.put_reg(*dst, data); + Ok(Continue) + } + Instruction::Collect { src_dst } => { + let data = ctx.take_reg(*src_dst); + let value = collect(data, *span)?; + ctx.put_reg(*src_dst, value); + Ok(Continue) + } + Instruction::Span { src_dst } => { + let data = ctx.take_reg(*src_dst); + let spanned = data.with_span(*span); + ctx.put_reg(*src_dst, spanned); + Ok(Continue) + } + Instruction::Drop { src } => { + ctx.take_reg(*src); + Ok(Continue) + } + Instruction::Drain { src } => { + let data = ctx.take_reg(*src); + drain(ctx, data) + } + Instruction::LoadVariable { dst, var_id } => { + let value = get_var(ctx, *var_id, *span)?; + ctx.put_reg(*dst, value.into_pipeline_data()); + Ok(Continue) + } + Instruction::StoreVariable { var_id, src } => { + let value = ctx.collect_reg(*src, *span)?; + ctx.stack.add_var(*var_id, value); + Ok(Continue) + } + Instruction::LoadEnv { dst, key } => { + let key = ctx.get_str(*key, *span)?; + if let Some(value) = get_env_var_case_insensitive(ctx, key) { + let new_value = value.clone().into_pipeline_data(); + ctx.put_reg(*dst, new_value); + Ok(Continue) + } else { + // FIXME: using the same span twice, shouldn't this really be + // EnvVarNotFoundAtRuntime? There are tests that depend on CantFindColumn though... + Err(ShellError::CantFindColumn { + col_name: key.into(), + span: Some(*span), + src_span: *span, + }) + } + } + Instruction::LoadEnvOpt { dst, key } => { + let key = ctx.get_str(*key, *span)?; + let value = get_env_var_case_insensitive(ctx, key) + .cloned() + .unwrap_or(Value::nothing(*span)); + ctx.put_reg(*dst, value.into_pipeline_data()); + Ok(Continue) + } + Instruction::StoreEnv { key, src } => { + let key = ctx.get_str(*key, *span)?; + let value = ctx.collect_reg(*src, *span)?; + + let key = get_env_var_name_case_insensitive(ctx, key); + + if !is_automatic_env_var(&key) { + ctx.stack.add_env_var(key.into_owned(), value); + Ok(Continue) + } else { + Err(ShellError::AutomaticEnvVarSetManually { + envvar_name: key.into(), + span: *span, + }) + } + } + Instruction::PushPositional { src } => { + let val = ctx.collect_reg(*src, *span)?.with_span(*span); + ctx.stack.arguments.push(Argument::Positional { + span: *span, + val, + ast: ast.clone().map(|ast_ref| ast_ref.0), + }); + Ok(Continue) + } + Instruction::AppendRest { src } => { + let vals = ctx.collect_reg(*src, *span)?.with_span(*span); + ctx.stack.arguments.push(Argument::Spread { + span: *span, + vals, + ast: ast.clone().map(|ast_ref| ast_ref.0), + }); + Ok(Continue) + } + Instruction::PushFlag { name } => { + let data = ctx.data.clone(); + ctx.stack.arguments.push(Argument::Flag { + data, + name: *name, + short: DataSlice::empty(), + span: *span, + }); + Ok(Continue) + } + Instruction::PushShortFlag { short } => { + let data = ctx.data.clone(); + ctx.stack.arguments.push(Argument::Flag { + data, + name: DataSlice::empty(), + short: *short, + span: *span, + }); + Ok(Continue) + } + Instruction::PushNamed { name, src } => { + let val = ctx.collect_reg(*src, *span)?.with_span(*span); + let data = ctx.data.clone(); + ctx.stack.arguments.push(Argument::Named { + data, + name: *name, + short: DataSlice::empty(), + span: *span, + val, + ast: ast.clone().map(|ast_ref| ast_ref.0), + }); + Ok(Continue) + } + Instruction::PushShortNamed { short, src } => { + let val = ctx.collect_reg(*src, *span)?.with_span(*span); + let data = ctx.data.clone(); + ctx.stack.arguments.push(Argument::Named { + data, + name: DataSlice::empty(), + short: *short, + span: *span, + val, + ast: ast.clone().map(|ast_ref| ast_ref.0), + }); + Ok(Continue) + } + Instruction::PushParserInfo { name, info } => { + let data = ctx.data.clone(); + ctx.stack.arguments.push(Argument::ParserInfo { + data, + name: *name, + info: info.clone(), + }); + Ok(Continue) + } + Instruction::RedirectOut { mode } => { + ctx.redirect_out = eval_redirection(ctx, mode, *span, RedirectionStream::Out)?; + Ok(Continue) + } + Instruction::RedirectErr { mode } => { + ctx.redirect_err = eval_redirection(ctx, mode, *span, RedirectionStream::Err)?; + Ok(Continue) + } + Instruction::CheckErrRedirected { src } => match ctx.borrow_reg(*src) { + PipelineData::ByteStream(stream, _) + if matches!(stream.source(), ByteStreamSource::Child(_)) => + { + Ok(Continue) + } + _ => Err(ShellError::GenericError { + error: "Can't redirect stderr of internal command output".into(), + msg: "piping stderr only works on external commands".into(), + span: Some(*span), + help: None, + inner: vec![], + }), + }, + Instruction::OpenFile { + file_num, + path, + append, + } => { + let path = ctx.collect_reg(*path, *span)?; + let file = open_file(ctx, &path, *append)?; + ctx.files[*file_num as usize] = Some(file); + Ok(Continue) + } + Instruction::WriteFile { file_num, src } => { + let src = ctx.take_reg(*src); + let file = ctx + .files + .get(*file_num as usize) + .cloned() + .flatten() + .ok_or_else(|| ShellError::IrEvalError { + msg: format!("Tried to write to file #{file_num}, but it is not open"), + span: Some(*span), + })?; + let result = { + let mut stack = ctx + .stack + .push_redirection(Some(Redirection::File(file)), None); + src.write_to_out_dests(ctx.engine_state, &mut stack)? + }; + // Abort execution if there's an exit code from a failed external + drain(ctx, result) + } + Instruction::CloseFile { file_num } => { + if ctx.files[*file_num as usize].take().is_some() { + Ok(Continue) + } else { + Err(ShellError::IrEvalError { + msg: format!("Tried to close file #{file_num}, but it is not open"), + span: Some(*span), + }) + } + } + Instruction::Call { decl_id, src_dst } => { + let input = ctx.take_reg(*src_dst); + let result = eval_call::(ctx, *decl_id, *span, input)?; + ctx.put_reg(*src_dst, result); + Ok(Continue) + } + Instruction::StringAppend { src_dst, val } => { + let string_value = ctx.collect_reg(*src_dst, *span)?; + let operand_value = ctx.collect_reg(*val, *span)?; + let string_span = string_value.span(); + + let mut string = string_value.into_string()?; + let operand = if let Value::String { val, .. } = operand_value { + // Small optimization, so we don't have to copy the string *again* + val + } else { + operand_value.to_expanded_string(", ", ctx.engine_state.get_config()) + }; + string.push_str(&operand); + + let new_string_value = Value::string(string, string_span); + ctx.put_reg(*src_dst, new_string_value.into_pipeline_data()); + Ok(Continue) + } + Instruction::GlobFrom { src_dst, no_expand } => { + let string_value = ctx.collect_reg(*src_dst, *span)?; + let glob_value = if matches!(string_value, Value::Glob { .. }) { + // It already is a glob, so don't touch it. + string_value + } else { + // Treat it as a string, then cast + let string = string_value.into_string()?; + Value::glob(string, *no_expand, *span) + }; + ctx.put_reg(*src_dst, glob_value.into_pipeline_data()); + Ok(Continue) + } + Instruction::ListPush { src_dst, item } => { + let list_value = ctx.collect_reg(*src_dst, *span)?; + let item = ctx.collect_reg(*item, *span)?; + let list_span = list_value.span(); + let mut list = list_value.into_list()?; + list.push(item); + ctx.put_reg(*src_dst, Value::list(list, list_span).into_pipeline_data()); + Ok(Continue) + } + Instruction::ListSpread { src_dst, items } => { + let list_value = ctx.collect_reg(*src_dst, *span)?; + let items = ctx.collect_reg(*items, *span)?; + let list_span = list_value.span(); + let items_span = items.span(); + let mut list = list_value.into_list()?; + list.extend( + items + .into_list() + .map_err(|_| ShellError::CannotSpreadAsList { span: items_span })?, + ); + ctx.put_reg(*src_dst, Value::list(list, list_span).into_pipeline_data()); + Ok(Continue) + } + Instruction::RecordInsert { src_dst, key, val } => { + let record_value = ctx.collect_reg(*src_dst, *span)?; + let key = ctx.collect_reg(*key, *span)?; + let val = ctx.collect_reg(*val, *span)?; + let record_span = record_value.span(); + let mut record = record_value.into_record()?; + + let key = key.coerce_into_string()?; + if let Some(old_value) = record.insert(&key, val) { + return Err(ShellError::ColumnDefinedTwice { + col_name: key, + second_use: *span, + first_use: old_value.span(), + }); + } + + ctx.put_reg( + *src_dst, + Value::record(record, record_span).into_pipeline_data(), + ); + Ok(Continue) + } + Instruction::RecordSpread { src_dst, items } => { + let record_value = ctx.collect_reg(*src_dst, *span)?; + let items = ctx.collect_reg(*items, *span)?; + let record_span = record_value.span(); + let items_span = items.span(); + let mut record = record_value.into_record()?; + // Not using .extend() here because it doesn't handle duplicates + for (key, val) in items + .into_record() + .map_err(|_| ShellError::CannotSpreadAsRecord { span: items_span })? + { + if let Some(first_value) = record.insert(&key, val) { + return Err(ShellError::ColumnDefinedTwice { + col_name: key, + second_use: *span, + first_use: first_value.span(), + }); + } + } + ctx.put_reg( + *src_dst, + Value::record(record, record_span).into_pipeline_data(), + ); + Ok(Continue) + } + Instruction::Not { src_dst } => { + let bool = ctx.collect_reg(*src_dst, *span)?; + let negated = !bool.as_bool()?; + ctx.put_reg( + *src_dst, + Value::bool(negated, bool.span()).into_pipeline_data(), + ); + Ok(Continue) + } + Instruction::BinaryOp { lhs_dst, op, rhs } => binary_op(ctx, *lhs_dst, op, *rhs, *span), + Instruction::FollowCellPath { src_dst, path } => { + let data = ctx.take_reg(*src_dst); + let path = ctx.take_reg(*path); + if let PipelineData::Value(Value::CellPath { val: path, .. }, _) = path { + let value = data.follow_cell_path(&path.members, *span, true)?; + ctx.put_reg(*src_dst, value.into_pipeline_data()); + Ok(Continue) + } else if let PipelineData::Value(Value::Error { error, .. }, _) = path { + Err(*error) + } else { + Err(ShellError::TypeMismatch { + err_message: "expected cell path".into(), + span: path.span().unwrap_or(*span), + }) + } + } + Instruction::CloneCellPath { dst, src, path } => { + let value = ctx.clone_reg_value(*src, *span)?; + let path = ctx.take_reg(*path); + if let PipelineData::Value(Value::CellPath { val: path, .. }, _) = path { + // TODO: make follow_cell_path() not have to take ownership, probably using Cow + let value = value.follow_cell_path(&path.members, true)?; + ctx.put_reg(*dst, value.into_pipeline_data()); + Ok(Continue) + } else if let PipelineData::Value(Value::Error { error, .. }, _) = path { + Err(*error) + } else { + Err(ShellError::TypeMismatch { + err_message: "expected cell path".into(), + span: path.span().unwrap_or(*span), + }) + } + } + Instruction::UpsertCellPath { + src_dst, + path, + new_value, + } => { + let data = ctx.take_reg(*src_dst); + let metadata = data.metadata(); + // Change the span because we're modifying it + let mut value = data.into_value(*span)?; + let path = ctx.take_reg(*path); + let new_value = ctx.collect_reg(*new_value, *span)?; + if let PipelineData::Value(Value::CellPath { val: path, .. }, _) = path { + value.upsert_data_at_cell_path(&path.members, new_value)?; + ctx.put_reg(*src_dst, value.into_pipeline_data_with_metadata(metadata)); + Ok(Continue) + } else if let PipelineData::Value(Value::Error { error, .. }, _) = path { + Err(*error) + } else { + Err(ShellError::TypeMismatch { + err_message: "expected cell path".into(), + span: path.span().unwrap_or(*span), + }) + } + } + Instruction::Jump { index } => Ok(Branch(*index)), + Instruction::BranchIf { cond, index } => { + let data = ctx.take_reg(*cond); + let data_span = data.span(); + let val = match data { + PipelineData::Value(Value::Bool { val, .. }, _) => val, + PipelineData::Value(Value::Error { error, .. }, _) => { + return Err(*error); + } + _ => { + return Err(ShellError::TypeMismatch { + err_message: "expected bool".into(), + span: data_span.unwrap_or(*span), + }); + } + }; + if val { + Ok(Branch(*index)) + } else { + Ok(Continue) + } + } + Instruction::BranchIfEmpty { src, index } => { + let is_empty = matches!( + ctx.borrow_reg(*src), + PipelineData::Empty | PipelineData::Value(Value::Nothing { .. }, _) + ); + + if is_empty { + Ok(Branch(*index)) + } else { + Ok(Continue) + } + } + Instruction::Match { + pattern, + src, + index, + } => { + let value = ctx.clone_reg_value(*src, *span)?; + ctx.matches.clear(); + if pattern.match_value(&value, &mut ctx.matches) { + // Match succeeded: set variables and branch + for (var_id, match_value) in ctx.matches.drain(..) { + ctx.stack.add_var(var_id, match_value); + } + Ok(Branch(*index)) + } else { + // Failed to match, put back original value + ctx.matches.clear(); + Ok(Continue) + } + } + Instruction::CheckMatchGuard { src } => { + if matches!( + ctx.borrow_reg(*src), + PipelineData::Value(Value::Bool { .. }, _) + ) { + Ok(Continue) + } else { + Err(ShellError::MatchGuardNotBool { span: *span }) + } + } + Instruction::Iterate { + dst, + stream, + end_index, + } => eval_iterate(ctx, *dst, *stream, *end_index), + Instruction::OnError { index } => { + ctx.stack.error_handlers.push(ErrorHandler { + handler_index: *index, + error_register: None, + }); + Ok(Continue) + } + Instruction::OnErrorInto { index, dst } => { + ctx.stack.error_handlers.push(ErrorHandler { + handler_index: *index, + error_register: Some(*dst), + }); + Ok(Continue) + } + Instruction::PopErrorHandler => { + ctx.stack.error_handlers.pop(ctx.error_handler_base); + Ok(Continue) + } + Instruction::CheckExternalFailed { dst, src } => { + let data = ctx.take_reg(*src); + let (data, failed) = data.check_external_failed()?; + ctx.put_reg(*src, data); + ctx.put_reg(*dst, Value::bool(failed, *span).into_pipeline_data()); + Ok(Continue) + } + Instruction::ReturnEarly { src } => { + let val = ctx.collect_reg(*src, *span)?; + Err(ShellError::Return { + span: *span, + value: Box::new(val), + }) + } + Instruction::Return { src } => Ok(Return(*src)), + } +} + +/// Load a literal value into a register +fn load_literal( + ctx: &mut EvalContext<'_>, + dst: RegId, + lit: &Literal, + span: Span, +) -> Result { + let value = literal_value(ctx, lit, span)?; + ctx.put_reg(dst, PipelineData::Value(value, None)); + Ok(InstructionResult::Continue) +} + +fn literal_value( + ctx: &mut EvalContext<'_>, + lit: &Literal, + span: Span, +) -> Result { + Ok(match lit { + Literal::Bool(b) => Value::bool(*b, span), + Literal::Int(i) => Value::int(*i, span), + Literal::Float(f) => Value::float(*f, span), + Literal::Filesize(q) => Value::filesize(*q, span), + Literal::Duration(q) => Value::duration(*q, span), + Literal::Binary(bin) => Value::binary(&ctx.data[*bin], span), + Literal::Block(block_id) | Literal::RowCondition(block_id) | Literal::Closure(block_id) => { + let block = ctx.engine_state.get_block(*block_id); + let captures = block + .captures + .iter() + .map(|var_id| get_var(ctx, *var_id, span).map(|val| (*var_id, val))) + .collect::, ShellError>>()?; + Value::closure( + Closure { + block_id: *block_id, + captures, + }, + span, + ) + } + Literal::Range { + start, + step, + end, + inclusion, + } => { + let start = ctx.collect_reg(*start, span)?; + let step = ctx.collect_reg(*step, span)?; + let end = ctx.collect_reg(*end, span)?; + let range = Range::new(start, step, end, *inclusion, span)?; + Value::range(range, span) + } + Literal::List { capacity } => Value::list(Vec::with_capacity(*capacity), span), + Literal::Record { capacity } => Value::record(Record::with_capacity(*capacity), span), + Literal::Filepath { + val: path, + no_expand, + } => { + let path = ctx.get_str(*path, span)?; + if *no_expand { + Value::string(path, span) + } else { + let cwd = ctx.engine_state.cwd(Some(ctx.stack))?; + let path = expand_path_with(path, cwd, true); + + Value::string(path.to_string_lossy(), span) + } + } + Literal::Directory { + val: path, + no_expand, + } => { + let path = ctx.get_str(*path, span)?; + if path == "-" { + Value::string("-", span) + } else if *no_expand { + Value::string(path, span) + } else { + let cwd = ctx + .engine_state + .cwd(Some(ctx.stack)) + .map(AbsolutePathBuf::into_std_path_buf) + .unwrap_or_default(); + let path = expand_path_with(path, cwd, true); + + Value::string(path.to_string_lossy(), span) + } + } + Literal::GlobPattern { val, no_expand } => { + Value::glob(ctx.get_str(*val, span)?, *no_expand, span) + } + Literal::String(s) => Value::string(ctx.get_str(*s, span)?, span), + Literal::RawString(s) => Value::string(ctx.get_str(*s, span)?, span), + Literal::CellPath(path) => Value::cell_path(CellPath::clone(path), span), + Literal::Date(dt) => Value::date(**dt, span), + Literal::Nothing => Value::nothing(span), + }) +} + +fn binary_op( + ctx: &mut EvalContext<'_>, + lhs_dst: RegId, + op: &Operator, + rhs: RegId, + span: Span, +) -> Result { + let lhs_val = ctx.collect_reg(lhs_dst, span)?; + let rhs_val = ctx.collect_reg(rhs, span)?; + + // Handle binary op errors early + if let Value::Error { error, .. } = lhs_val { + return Err(*error); + } + if let Value::Error { error, .. } = rhs_val { + return Err(*error); + } + + // We only have access to one span here, but the generated code usually adds a `span` + // instruction to set the output span to the right span. + let op_span = span; + + let result = match op { + Operator::Comparison(cmp) => match cmp { + Comparison::Equal => lhs_val.eq(op_span, &rhs_val, span)?, + Comparison::NotEqual => lhs_val.ne(op_span, &rhs_val, span)?, + Comparison::LessThan => lhs_val.lt(op_span, &rhs_val, span)?, + Comparison::GreaterThan => lhs_val.gt(op_span, &rhs_val, span)?, + Comparison::LessThanOrEqual => lhs_val.lte(op_span, &rhs_val, span)?, + Comparison::GreaterThanOrEqual => lhs_val.gte(op_span, &rhs_val, span)?, + Comparison::RegexMatch => { + lhs_val.regex_match(ctx.engine_state, op_span, &rhs_val, false, span)? + } + Comparison::NotRegexMatch => { + lhs_val.regex_match(ctx.engine_state, op_span, &rhs_val, true, span)? + } + Comparison::In => lhs_val.r#in(op_span, &rhs_val, span)?, + Comparison::NotIn => lhs_val.not_in(op_span, &rhs_val, span)?, + Comparison::StartsWith => lhs_val.starts_with(op_span, &rhs_val, span)?, + Comparison::EndsWith => lhs_val.ends_with(op_span, &rhs_val, span)?, + }, + Operator::Math(mat) => match mat { + Math::Plus => lhs_val.add(op_span, &rhs_val, span)?, + Math::Append => lhs_val.append(op_span, &rhs_val, span)?, + Math::Minus => lhs_val.sub(op_span, &rhs_val, span)?, + Math::Multiply => lhs_val.mul(op_span, &rhs_val, span)?, + Math::Divide => lhs_val.div(op_span, &rhs_val, span)?, + Math::Modulo => lhs_val.modulo(op_span, &rhs_val, span)?, + Math::FloorDivision => lhs_val.floor_div(op_span, &rhs_val, span)?, + Math::Pow => lhs_val.pow(op_span, &rhs_val, span)?, + }, + Operator::Boolean(bl) => match bl { + Boolean::And => lhs_val.and(op_span, &rhs_val, span)?, + Boolean::Or => lhs_val.or(op_span, &rhs_val, span)?, + Boolean::Xor => lhs_val.xor(op_span, &rhs_val, span)?, + }, + Operator::Bits(bit) => match bit { + Bits::BitOr => lhs_val.bit_or(op_span, &rhs_val, span)?, + Bits::BitXor => lhs_val.bit_xor(op_span, &rhs_val, span)?, + Bits::BitAnd => lhs_val.bit_and(op_span, &rhs_val, span)?, + Bits::ShiftLeft => lhs_val.bit_shl(op_span, &rhs_val, span)?, + Bits::ShiftRight => lhs_val.bit_shr(op_span, &rhs_val, span)?, + }, + Operator::Assignment(_asg) => { + return Err(ShellError::IrEvalError { + msg: "can't eval assignment with the `binary-op` instruction".into(), + span: Some(span), + }) + } + }; + + ctx.put_reg(lhs_dst, PipelineData::Value(result, None)); + + Ok(InstructionResult::Continue) +} + +/// Evaluate a call +fn eval_call( + ctx: &mut EvalContext<'_>, + decl_id: DeclId, + head: Span, + input: PipelineData, +) -> Result { + let EvalContext { + engine_state, + stack: caller_stack, + args_base, + redirect_out, + redirect_err, + .. + } = ctx; + + let args_len = caller_stack.arguments.get_len(*args_base); + let decl = engine_state.get_decl(decl_id); + + // Set up redirect modes + let mut caller_stack = caller_stack.push_redirection(redirect_out.take(), redirect_err.take()); + + let result; + + if let Some(block_id) = decl.block_id() { + // If the decl is a custom command + let block = engine_state.get_block(block_id); + + // Set up a callee stack with the captures and move arguments from the stack into variables + let mut callee_stack = caller_stack.gather_captures(engine_state, &block.captures); + + gather_arguments( + engine_state, + block, + &mut caller_stack, + &mut callee_stack, + *args_base, + args_len, + head, + )?; + + // Add one to the recursion count, so we don't recurse too deep. Stack overflows are not + // recoverable in Rust. + callee_stack.recursion_count += 1; + + result = eval_block_with_early_return::(engine_state, &mut callee_stack, block, input); + + // Move environment variables back into the caller stack scope if requested to do so + if block.redirect_env { + redirect_env(engine_state, &mut caller_stack, &callee_stack); + } + } else { + // FIXME: precalculate this and save it somewhere + let span = Span::merge_many( + std::iter::once(head).chain( + caller_stack + .arguments + .get_args(*args_base, args_len) + .iter() + .flat_map(|arg| arg.span()), + ), + ); + + let call = Call { + decl_id, + head, + span, + args_base: *args_base, + args_len, + }; + + // Run the call + result = decl.run(engine_state, &mut caller_stack, &(&call).into(), input); + }; + + drop(caller_stack); + + // Important that this runs, to reset state post-call: + ctx.stack.arguments.leave_frame(ctx.args_base); + ctx.redirect_out = None; + ctx.redirect_err = None; + + result +} + +fn find_named_var_id( + sig: &Signature, + name: &[u8], + short: &[u8], + span: Span, +) -> Result { + sig.named + .iter() + .find(|n| { + if !n.long.is_empty() { + n.long.as_bytes() == name + } else { + // It's possible to only have a short name and no long name + n.short + .is_some_and(|s| s.encode_utf8(&mut [0; 4]).as_bytes() == short) + } + }) + .ok_or_else(|| ShellError::IrEvalError { + msg: format!( + "block does not have an argument named `{}`", + String::from_utf8_lossy(name) + ), + span: Some(span), + }) + .and_then(|flag| expect_named_var_id(flag, span)) +} + +fn expect_named_var_id(arg: &Flag, span: Span) -> Result { + arg.var_id.ok_or_else(|| ShellError::IrEvalError { + msg: format!( + "block signature is missing var id for named arg `{}`", + arg.long + ), + span: Some(span), + }) +} + +fn expect_positional_var_id(arg: &PositionalArg, span: Span) -> Result { + arg.var_id.ok_or_else(|| ShellError::IrEvalError { + msg: format!( + "block signature is missing var id for positional arg `{}`", + arg.name + ), + span: Some(span), + }) +} + +/// Move arguments from the stack into variables for a custom command +fn gather_arguments( + engine_state: &EngineState, + block: &Block, + caller_stack: &mut Stack, + callee_stack: &mut Stack, + args_base: usize, + args_len: usize, + call_head: Span, +) -> Result<(), ShellError> { + let mut positional_iter = block + .signature + .required_positional + .iter() + .map(|p| (p, true)) + .chain( + block + .signature + .optional_positional + .iter() + .map(|p| (p, false)), + ); + + // Arguments that didn't get consumed by required/optional + let mut rest = vec![]; + + // If we encounter a spread, all further positionals should go to rest + let mut always_spread = false; + + for arg in caller_stack.arguments.drain_args(args_base, args_len) { + match arg { + Argument::Positional { span, val, .. } => { + // Don't check next positional arg if we encountered a spread previously + let next = (!always_spread).then(|| positional_iter.next()).flatten(); + if let Some((positional_arg, required)) = next { + let var_id = expect_positional_var_id(positional_arg, span)?; + if required { + // By checking the type of the bound variable rather than converting the + // SyntaxShape here, we might be able to save some allocations and effort + let variable = engine_state.get_var(var_id); + check_type(&val, &variable.ty)?; + } + callee_stack.add_var(var_id, val); + } else { + rest.push(val); + } + } + Argument::Spread { vals, .. } => { + if let Value::List { vals, .. } = vals { + rest.extend(vals); + // All further positional args should go to spread + always_spread = true; + } else if let Value::Error { error, .. } = vals { + return Err(*error); + } else { + return Err(ShellError::CannotSpreadAsList { span: vals.span() }); + } + } + Argument::Flag { + data, + name, + short, + span, + } => { + let var_id = find_named_var_id(&block.signature, &data[name], &data[short], span)?; + callee_stack.add_var(var_id, Value::bool(true, span)) + } + Argument::Named { + data, + name, + short, + span, + val, + .. + } => { + let var_id = find_named_var_id(&block.signature, &data[name], &data[short], span)?; + callee_stack.add_var(var_id, val) + } + Argument::ParserInfo { .. } => (), + } + } + + // Add the collected rest of the arguments if a spread argument exists + if let Some(rest_arg) = &block.signature.rest_positional { + let rest_span = rest.first().map(|v| v.span()).unwrap_or(call_head); + let var_id = expect_positional_var_id(rest_arg, rest_span)?; + callee_stack.add_var(var_id, Value::list(rest, rest_span)); + } + + // Check for arguments that haven't yet been set and set them to their defaults + for (positional_arg, _) in positional_iter { + let var_id = expect_positional_var_id(positional_arg, call_head)?; + callee_stack.add_var( + var_id, + positional_arg + .default_value + .clone() + .unwrap_or(Value::nothing(call_head)), + ); + } + + for named_arg in &block.signature.named { + if let Some(var_id) = named_arg.var_id { + // For named arguments, we do this check by looking to see if the variable was set yet on + // the stack. This assumes that the stack's variables was previously empty, but that's a + // fair assumption for a brand new callee stack. + if !callee_stack.vars.iter().any(|(id, _)| *id == var_id) { + let val = if named_arg.arg.is_none() { + Value::bool(false, call_head) + } else if let Some(value) = &named_arg.default_value { + value.clone() + } else { + Value::nothing(call_head) + }; + callee_stack.add_var(var_id, val); + } + } + } + + Ok(()) +} + +/// Type check helper. Produces `CantConvert` error if `val` is not compatible with `ty`. +fn check_type(val: &Value, ty: &Type) -> Result<(), ShellError> { + if match val { + // An empty list is compatible with any list or table type + Value::List { vals, .. } if vals.is_empty() => { + matches!(ty, Type::Any | Type::List(_) | Type::Table(_)) + } + // FIXME: the allocation that might be required here is not great, it would be nice to be + // able to just directly check whether a value is compatible with a type + _ => val.get_type().is_subtype(ty), + } { + Ok(()) + } else { + Err(ShellError::CantConvert { + to_type: ty.to_string(), + from_type: val.get_type().to_string(), + span: val.span(), + help: None, + }) + } +} + +/// Get variable from [`Stack`] or [`EngineState`] +fn get_var(ctx: &EvalContext<'_>, var_id: VarId, span: Span) -> Result { + match var_id { + // $env + ENV_VARIABLE_ID => { + let env_vars = ctx.stack.get_env_vars(ctx.engine_state); + let env_columns = env_vars.keys(); + let env_values = env_vars.values(); + + let mut pairs = env_columns + .map(|x| x.to_string()) + .zip(env_values.cloned()) + .collect::>(); + + pairs.sort_by(|a, b| a.0.cmp(&b.0)); + + Ok(Value::record(pairs.into_iter().collect(), span)) + } + _ => ctx.stack.get_var(var_id, span).or_else(|err| { + // $nu is handled by getting constant + if let Some(const_val) = ctx.engine_state.get_constant(var_id).cloned() { + Ok(const_val.with_span(span)) + } else { + Err(err) + } + }), + } +} + +/// Get an environment variable, case-insensitively +fn get_env_var_case_insensitive<'a>(ctx: &'a mut EvalContext<'_>, key: &str) -> Option<&'a Value> { + // Read scopes in order + ctx.stack + .env_vars + .iter() + .rev() + .chain(std::iter::once(ctx.engine_state.env_vars.as_ref())) + .flat_map(|overlays| { + // Read overlays in order + ctx.stack + .active_overlays + .iter() + .rev() + .filter_map(|name| overlays.get(name)) + }) + .find_map(|map| { + // Use the hashmap first to try to be faster? + map.get(key).or_else(|| { + // Check to see if it exists at all in the map + map.iter() + .find_map(|(k, v)| k.eq_ignore_case(key).then_some(v)) + }) + }) +} + +/// Get the existing name of an environment variable, case-insensitively. This is used to implement +/// case preservation of environment variables, so that changing an environment variable that +/// already exists always uses the same case. +fn get_env_var_name_case_insensitive<'a>(ctx: &mut EvalContext<'_>, key: &'a str) -> Cow<'a, str> { + // Read scopes in order + ctx.stack + .env_vars + .iter() + .rev() + .chain(std::iter::once(ctx.engine_state.env_vars.as_ref())) + .flat_map(|overlays| { + // Read overlays in order + ctx.stack + .active_overlays + .iter() + .rev() + .filter_map(|name| overlays.get(name)) + }) + .find_map(|map| { + // Use the hashmap first to try to be faster? + if map.contains_key(key) { + Some(Cow::Borrowed(key)) + } else { + map.keys().find(|k| k.eq_ignore_case(key)).map(|k| { + // it exists, but with a different case + Cow::Owned(k.to_owned()) + }) + } + }) + // didn't exist. + .unwrap_or(Cow::Borrowed(key)) +} + +/// Helper to collect values into [`PipelineData`], preserving original span and metadata +fn collect(data: PipelineData, fallback_span: Span) -> Result { + let span = data.span().unwrap_or(fallback_span); + let metadata = data.metadata(); + let value = data.into_value(span)?; + Ok(PipelineData::Value(value, metadata)) +} + +/// Helper for drain behavior. Returns `Ok(ExitCode)` on failed external. +fn drain(ctx: &mut EvalContext<'_>, data: PipelineData) -> Result { + use self::InstructionResult::*; + let span = data.span().unwrap_or(Span::unknown()); + if let Some(exit_status) = data.drain()? { + ctx.stack.add_env_var( + "LAST_EXIT_CODE".into(), + Value::int(exit_status.code() as i64, span), + ); + if exit_status.code() == 0 { + Ok(Continue) + } else { + Ok(ExitCode(exit_status.code())) + } + } else { + Ok(Continue) + } +} + +enum RedirectionStream { + Out, + Err, +} + +/// Open a file for redirection +fn open_file(ctx: &EvalContext<'_>, path: &Value, append: bool) -> Result, ShellError> { + let path_expanded = + expand_path_with(path.as_str()?, ctx.engine_state.cwd(Some(ctx.stack))?, true); + let mut options = File::options(); + if append { + options.append(true); + } else { + options.write(true).truncate(true); + } + let file = options + .create(true) + .open(path_expanded) + .err_span(path.span())?; + Ok(Arc::new(file)) +} + +/// Set up a [`Redirection`] from a [`RedirectMode`] +fn eval_redirection( + ctx: &mut EvalContext<'_>, + mode: &RedirectMode, + span: Span, + which: RedirectionStream, +) -> Result, ShellError> { + match mode { + RedirectMode::Pipe => Ok(Some(Redirection::Pipe(OutDest::Pipe))), + RedirectMode::Capture => Ok(Some(Redirection::Pipe(OutDest::Capture))), + RedirectMode::Null => Ok(Some(Redirection::Pipe(OutDest::Null))), + RedirectMode::Inherit => Ok(Some(Redirection::Pipe(OutDest::Inherit))), + RedirectMode::File { file_num } => { + let file = ctx + .files + .get(*file_num as usize) + .cloned() + .flatten() + .ok_or_else(|| ShellError::IrEvalError { + msg: format!("Tried to redirect to file #{file_num}, but it is not open"), + span: Some(span), + })?; + Ok(Some(Redirection::File(file))) + } + RedirectMode::Caller => Ok(match which { + RedirectionStream::Out => ctx.stack.pipe_stdout().cloned().map(Redirection::Pipe), + RedirectionStream::Err => ctx.stack.pipe_stderr().cloned().map(Redirection::Pipe), + }), + } +} + +/// Do an `iterate` instruction. This can be called repeatedly to get more values from an iterable +fn eval_iterate( + ctx: &mut EvalContext<'_>, + dst: RegId, + stream: RegId, + end_index: usize, +) -> Result { + let mut data = ctx.take_reg(stream); + if let PipelineData::ListStream(list_stream, _) = &mut data { + // Modify the stream, taking one value off, and branching if it's empty + if let Some(val) = list_stream.next_value() { + ctx.put_reg(dst, val.into_pipeline_data()); + ctx.put_reg(stream, data); // put the stream back so it can be iterated on again + Ok(InstructionResult::Continue) + } else { + ctx.put_reg(dst, PipelineData::Empty); + Ok(InstructionResult::Branch(end_index)) + } + } else { + // Convert the PipelineData to an iterator, and wrap it in a ListStream so it can be + // iterated on + let metadata = data.metadata(); + let span = data.span().unwrap_or(Span::unknown()); + ctx.put_reg( + stream, + PipelineData::ListStream( + ListStream::new(data.into_iter(), span, Signals::EMPTY), + metadata, + ), + ); + eval_iterate(ctx, dst, stream, end_index) + } +} + +/// Redirect environment from the callee stack to the caller stack +fn redirect_env(engine_state: &EngineState, caller_stack: &mut Stack, callee_stack: &Stack) { + // TODO: make this more efficient + // Grab all environment variables from the callee + let caller_env_vars = caller_stack.get_env_var_names(engine_state); + + // remove env vars that are present in the caller but not in the callee + // (the callee hid them) + for var in caller_env_vars.iter() { + if !callee_stack.has_env_var(engine_state, var) { + caller_stack.remove_env_var(engine_state, var); + } + } + + // add new env vars from callee to caller + for (var, value) in callee_stack.get_stack_env_vars() { + caller_stack.add_env_var(var, value); + } +} diff --git a/crates/nu-engine/src/lib.rs b/crates/nu-engine/src/lib.rs index e3c8f8eede..7ed246e975 100644 --- a/crates/nu-engine/src/lib.rs +++ b/crates/nu-engine/src/lib.rs @@ -2,16 +2,19 @@ mod call_ext; mod closure_eval; pub mod column; pub mod command_prelude; +mod compile; pub mod documentation; pub mod env; mod eval; mod eval_helpers; +mod eval_ir; mod glob_from; pub mod scope; pub use call_ext::CallExt; pub use closure_eval::*; pub use column::get_columns; +pub use compile::compile; pub use documentation::get_full_help; pub use env::*; pub use eval::{ @@ -19,4 +22,5 @@ pub use eval::{ eval_expression_with_input, eval_subexpression, eval_variable, redirect_env, }; pub use eval_helpers::*; +pub use eval_ir::eval_ir_block; pub use glob_from::glob_from; diff --git a/crates/nu-parser/src/known_external.rs b/crates/nu-parser/src/known_external.rs index 453112aa32..a41cf3a4e8 100644 --- a/crates/nu-parser/src/known_external.rs +++ b/crates/nu-parser/src/known_external.rs @@ -1,7 +1,8 @@ use nu_engine::command_prelude::*; use nu_protocol::{ - ast::{Argument, Expr, Expression}, - engine::{CommandType, UNKNOWN_SPAN_ID}, + ast::{self, Expr, Expression}, + engine::{self, CallImpl, CommandType, UNKNOWN_SPAN_ID}, + ir::{self, DataSlice}, }; #[derive(Clone)] @@ -43,8 +44,6 @@ impl Command for KnownExternal { let command = engine_state.get_decl(decl_id); - let mut extern_call = Call::new(head_span); - let extern_name = if let Some(name_bytes) = engine_state.find_decl_name(call.decl_id, &[]) { String::from_utf8_lossy(name_bytes) } else { @@ -56,59 +55,166 @@ impl Command for KnownExternal { }; let extern_name: Vec<_> = extern_name.split(' ').collect(); - let call_head_id = engine_state - .find_span_id(call.head) - .unwrap_or(UNKNOWN_SPAN_ID); - let arg_extern_name = Expression::new_existing( - Expr::String(extern_name[0].to_string()), + match &call.inner { + CallImpl::AstRef(call) => { + let extern_call = ast_call_to_extern_call(engine_state, call, &extern_name)?; + command.run(engine_state, stack, &(&extern_call).into(), input) + } + CallImpl::AstBox(call) => { + let extern_call = ast_call_to_extern_call(engine_state, call, &extern_name)?; + command.run(engine_state, stack, &(&extern_call).into(), input) + } + CallImpl::IrRef(call) => { + let extern_call = ir_call_to_extern_call(stack, call, &extern_name)?; + command.run(engine_state, stack, &(&extern_call).into(), input) + } + CallImpl::IrBox(call) => { + let extern_call = ir_call_to_extern_call(stack, call, &extern_name)?; + command.run(engine_state, stack, &(&extern_call).into(), input) + } + } + } +} + +/// Transform the args from an `ast::Call` onto a `run-external` call +fn ast_call_to_extern_call( + engine_state: &EngineState, + call: &ast::Call, + extern_name: &[&str], +) -> Result { + let head_span = call.head; + + let mut extern_call = ast::Call::new(head_span); + + let call_head_id = engine_state + .find_span_id(call.head) + .unwrap_or(UNKNOWN_SPAN_ID); + + let arg_extern_name = Expression::new_existing( + Expr::String(extern_name[0].to_string()), + call.head, + call_head_id, + Type::String, + ); + + extern_call.add_positional(arg_extern_name); + + for subcommand in extern_name.iter().skip(1) { + extern_call.add_positional(Expression::new_existing( + Expr::String(subcommand.to_string()), call.head, call_head_id, Type::String, - ); + )); + } - extern_call.add_positional(arg_extern_name); - - for subcommand in extern_name.into_iter().skip(1) { - extern_call.add_positional(Expression::new_existing( - Expr::String(subcommand.to_string()), - call.head, - call_head_id, - Type::String, - )); - } - - for arg in &call.arguments { - match arg { - Argument::Positional(positional) => extern_call.add_positional(positional.clone()), - Argument::Named(named) => { - let named_span_id = engine_state - .find_span_id(named.0.span) - .unwrap_or(UNKNOWN_SPAN_ID); - if let Some(short) = &named.1 { - extern_call.add_positional(Expression::new_existing( - Expr::String(format!("-{}", short.item)), - named.0.span, - named_span_id, - Type::String, - )); - } else { - extern_call.add_positional(Expression::new_existing( - Expr::String(format!("--{}", named.0.item)), - named.0.span, - named_span_id, - Type::String, - )); - } - if let Some(arg) = &named.2 { - extern_call.add_positional(arg.clone()); - } + for arg in &call.arguments { + match arg { + ast::Argument::Positional(positional) => extern_call.add_positional(positional.clone()), + ast::Argument::Named(named) => { + let named_span_id = engine_state + .find_span_id(named.0.span) + .unwrap_or(UNKNOWN_SPAN_ID); + if let Some(short) = &named.1 { + extern_call.add_positional(Expression::new_existing( + Expr::String(format!("-{}", short.item)), + named.0.span, + named_span_id, + Type::String, + )); + } else { + extern_call.add_positional(Expression::new_existing( + Expr::String(format!("--{}", named.0.item)), + named.0.span, + named_span_id, + Type::String, + )); } - Argument::Unknown(unknown) => extern_call.add_unknown(unknown.clone()), - Argument::Spread(args) => extern_call.add_spread(args.clone()), + if let Some(arg) = &named.2 { + extern_call.add_positional(arg.clone()); + } + } + ast::Argument::Unknown(unknown) => extern_call.add_unknown(unknown.clone()), + ast::Argument::Spread(args) => extern_call.add_spread(args.clone()), + } + } + + Ok(extern_call) +} + +/// Transform the args from an `ir::Call` onto a `run-external` call +fn ir_call_to_extern_call( + stack: &mut Stack, + call: &ir::Call, + extern_name: &[&str], +) -> Result { + let mut extern_call = ir::Call::build(call.decl_id, call.head); + + // Add the command and subcommands + for name in extern_name { + extern_call.add_positional(stack, call.head, Value::string(*name, call.head)); + } + + // Add the arguments, reformatting named arguments into string positionals + for index in 0..call.args_len { + match &call.arguments(stack)[index] { + engine::Argument::Flag { + data, + name, + short, + span, + } => { + let name_arg = engine::Argument::Positional { + span: *span, + val: Value::string(known_external_option_name(data, *name, *short), *span), + ast: None, + }; + extern_call.add_argument(stack, name_arg); + } + engine::Argument::Named { + data, + name, + short, + span, + val, + .. + } => { + let name_arg = engine::Argument::Positional { + span: *span, + val: Value::string(known_external_option_name(data, *name, *short), *span), + ast: None, + }; + let val_arg = engine::Argument::Positional { + span: *span, + val: val.clone(), + ast: None, + }; + extern_call.add_argument(stack, name_arg); + extern_call.add_argument(stack, val_arg); + } + a @ (engine::Argument::Positional { .. } + | engine::Argument::Spread { .. } + | engine::Argument::ParserInfo { .. }) => { + let argument = a.clone(); + extern_call.add_argument(stack, argument); } } + } - command.run(engine_state, stack, &extern_call, input) + Ok(extern_call.finish()) +} + +fn known_external_option_name(data: &[u8], name: DataSlice, short: DataSlice) -> String { + if !data[name].is_empty() { + format!( + "--{}", + std::str::from_utf8(&data[name]).expect("invalid utf-8 in flag name") + ) + } else { + format!( + "-{}", + std::str::from_utf8(&data[short]).expect("invalid utf-8 in flag short name") + ) } } diff --git a/crates/nu-parser/src/parse_patterns.rs b/crates/nu-parser/src/parse_patterns.rs index 73668b7d04..dc4a64ce37 100644 --- a/crates/nu-parser/src/parse_patterns.rs +++ b/crates/nu-parser/src/parse_patterns.rs @@ -39,7 +39,7 @@ pub fn parse_pattern(working_set: &mut StateWorkingSet, span: Span) -> MatchPatt let value = parse_value(working_set, span, &SyntaxShape::Any); MatchPattern { - pattern: Pattern::Value(value), + pattern: Pattern::Value(Box::new(value)), guard: None, span, } diff --git a/crates/nu-parser/src/parser.rs b/crates/nu-parser/src/parser.rs index 51935214f5..fc2131aad7 100644 --- a/crates/nu-parser/src/parser.rs +++ b/crates/nu-parser/src/parser.rs @@ -3302,6 +3302,8 @@ pub fn parse_row_condition(working_set: &mut StateWorkingSet, spans: &[Span]) -> default_value: None, }); + compile_block(working_set, &mut block); + working_set.add_block(Arc::new(block)) } }; @@ -4445,7 +4447,7 @@ pub fn parse_match_block_expression(working_set: &mut StateWorkingSet, span: Spa &SyntaxShape::MathExpression, ); - pattern.guard = Some(guard); + pattern.guard = Some(Box::new(guard)); position += if found { start + 1 } else { start }; connector = working_set.get_span_contents(output[position].span); } @@ -5298,6 +5300,8 @@ pub fn parse_expression(working_set: &mut StateWorkingSet, spans: &[Span]) -> Ex let ty = output.ty.clone(); block.pipelines = vec![Pipeline::from_vec(vec![output])]; + compile_block(working_set, &mut block); + let block_id = working_set.add_block(Arc::new(block)); let mut env_vars = vec![]; @@ -5853,9 +5857,25 @@ pub fn parse_block( working_set.parse_errors.extend_from_slice(&errors); } + // Do not try to compile blocks that are subexpressions, or when we've already had a parse + // failure as that definitely will fail to compile + if !is_subexpression && working_set.parse_errors.is_empty() { + compile_block(working_set, &mut block); + } + block } +/// Compile an IR block for the `Block`, adding a compile error on failure +fn compile_block(working_set: &mut StateWorkingSet<'_>, block: &mut Block) { + match nu_engine::compile(working_set, block) { + Ok(ir_block) => { + block.ir_block = Some(ir_block); + } + Err(err) => working_set.compile_errors.push(err), + } +} + pub fn discover_captures_in_closure( working_set: &StateWorkingSet, block: &Block, @@ -6298,12 +6318,14 @@ fn wrap_expr_with_collect(working_set: &mut StateWorkingSet, expr: &Expression) default_value: None, }); - let block = Block { + let mut block = Block { pipelines: vec![Pipeline::from_vec(vec![expr.clone()])], signature: Box::new(signature), ..Default::default() }; + compile_block(working_set, &mut block); + let block_id = working_set.add_block(Arc::new(block)); output.push(Argument::Positional(Expression::new( diff --git a/crates/nu-parser/tests/test_parser.rs b/crates/nu-parser/tests/test_parser.rs index 0784fe69d4..7762863ba1 100644 --- a/crates/nu-parser/tests/test_parser.rs +++ b/crates/nu-parser/tests/test_parser.rs @@ -1,7 +1,7 @@ use nu_parser::*; use nu_protocol::{ - ast::{Argument, Call, Expr, Expression, ExternalArgument, PathMember, Range}, - engine::{Command, EngineState, Stack, StateWorkingSet}, + ast::{Argument, Expr, Expression, ExternalArgument, PathMember, Range}, + engine::{Call, Command, EngineState, Stack, StateWorkingSet}, ParseError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, }; use rstest::rstest; @@ -1759,10 +1759,7 @@ mod range { #[cfg(test)] mod input_types { use super::*; - use nu_protocol::{ - ast::{Argument, Call}, - Category, PipelineData, ShellError, Type, - }; + use nu_protocol::{ast::Argument, engine::Call, Category, PipelineData, ShellError, Type}; #[derive(Clone)] pub struct LsTest; diff --git a/crates/nu-plugin-engine/src/context.rs b/crates/nu-plugin-engine/src/context.rs index b026d21b23..0df533a11e 100644 --- a/crates/nu-plugin-engine/src/context.rs +++ b/crates/nu-plugin-engine/src/context.rs @@ -1,8 +1,7 @@ use crate::util::MutableCow; use nu_engine::{get_eval_block_with_early_return, get_full_help, ClosureEvalOnce}; use nu_protocol::{ - ast::Call, - engine::{Closure, EngineState, Redirection, Stack}, + engine::{Call, Closure, EngineState, Redirection, Stack}, Config, IntoSpanned, OutDest, PipelineData, PluginIdentity, ShellError, Signals, Span, Spanned, Value, }; @@ -54,7 +53,7 @@ pub struct PluginExecutionCommandContext<'a> { identity: Arc, engine_state: Cow<'a, EngineState>, stack: MutableCow<'a, Stack>, - call: Cow<'a, Call>, + call: Call<'a>, } impl<'a> PluginExecutionCommandContext<'a> { @@ -62,13 +61,13 @@ impl<'a> PluginExecutionCommandContext<'a> { identity: Arc, engine_state: &'a EngineState, stack: &'a mut Stack, - call: &'a Call, + call: &'a Call<'a>, ) -> PluginExecutionCommandContext<'a> { PluginExecutionCommandContext { identity, engine_state: Cow::Borrowed(engine_state), stack: MutableCow::Borrowed(stack), - call: Cow::Borrowed(call), + call: call.clone(), } } } @@ -217,7 +216,7 @@ impl<'a> PluginExecutionContext for PluginExecutionCommandContext<'a> { identity: self.identity.clone(), engine_state: Cow::Owned(self.engine_state.clone().into_owned()), stack: self.stack.owned(), - call: Cow::Owned(self.call.clone().into_owned()), + call: self.call.to_owned(), }) } } diff --git a/crates/nu-plugin-protocol/src/evaluated_call.rs b/crates/nu-plugin-protocol/src/evaluated_call.rs index 19f9049340..58f8987865 100644 --- a/crates/nu-plugin-protocol/src/evaluated_call.rs +++ b/crates/nu-plugin-protocol/src/evaluated_call.rs @@ -1,7 +1,7 @@ use nu_protocol::{ - ast::{Call, Expression}, - engine::{EngineState, Stack}, - FromValue, ShellError, Span, Spanned, Value, + ast::{self, Expression}, + engine::{Call, CallImpl, EngineState, Stack}, + ir, FromValue, ShellError, Span, Spanned, Value, }; use serde::{Deserialize, Serialize}; @@ -33,6 +33,24 @@ impl EvaluatedCall { engine_state: &EngineState, stack: &mut Stack, eval_expression_fn: fn(&EngineState, &mut Stack, &Expression) -> Result, + ) -> Result { + match &call.inner { + CallImpl::AstRef(call) => { + Self::try_from_ast_call(call, engine_state, stack, eval_expression_fn) + } + CallImpl::AstBox(call) => { + Self::try_from_ast_call(call, engine_state, stack, eval_expression_fn) + } + CallImpl::IrRef(call) => Self::try_from_ir_call(call, stack), + CallImpl::IrBox(call) => Self::try_from_ir_call(call, stack), + } + } + + fn try_from_ast_call( + call: &ast::Call, + engine_state: &EngineState, + stack: &mut Stack, + eval_expression_fn: fn(&EngineState, &mut Stack, &Expression) -> Result, ) -> Result { let positional = call.rest_iter_flattened(0, |expr| eval_expression_fn(engine_state, stack, expr))?; @@ -54,6 +72,22 @@ impl EvaluatedCall { }) } + fn try_from_ir_call(call: &ir::Call, stack: &Stack) -> Result { + let positional = call.rest_iter_flattened(stack, 0)?; + + let mut named = Vec::with_capacity(call.named_len(stack)); + named.extend( + call.named_iter(stack) + .map(|(name, value)| (name.map(|s| s.to_owned()), value.cloned())), + ); + + Ok(Self { + head: call.head, + positional, + named, + }) + } + /// Check if a flag (named parameter that does not take a value) is set /// Returns Ok(true) if flag is set or passed true value /// Returns Ok(false) if flag is not set or passed false value diff --git a/crates/nu-protocol/Cargo.toml b/crates/nu-protocol/Cargo.toml index ee0f5a8221..eaa861a073 100644 --- a/crates/nu-protocol/Cargo.toml +++ b/crates/nu-protocol/Cargo.toml @@ -33,6 +33,7 @@ serde = { workspace = true, default-features = false } thiserror = "1.0" typetag = "0.2" os_pipe = { workspace = true, features = ["io_safety"] } +log = { workspace = true } [target.'cfg(unix)'.dependencies] nix = { workspace = true, default-features = false, features = ["signal"] } @@ -54,4 +55,4 @@ tempfile = { workspace = true } os_pipe = { workspace = true } [package.metadata.docs.rs] -all-features = true \ No newline at end of file +all-features = true diff --git a/crates/nu-protocol/src/alias.rs b/crates/nu-protocol/src/alias.rs index 24448225d4..8f5ea43934 100644 --- a/crates/nu-protocol/src/alias.rs +++ b/crates/nu-protocol/src/alias.rs @@ -1,6 +1,6 @@ use crate::{ - ast::{Call, Expression}, - engine::{Command, CommandType, EngineState, Stack}, + ast::Expression, + engine::{Call, Command, CommandType, EngineState, Stack}, PipelineData, ShellError, Signature, }; diff --git a/crates/nu-protocol/src/ast/block.rs b/crates/nu-protocol/src/ast/block.rs index 6e3449af26..8f62ff99ba 100644 --- a/crates/nu-protocol/src/ast/block.rs +++ b/crates/nu-protocol/src/ast/block.rs @@ -1,5 +1,5 @@ use super::Pipeline; -use crate::{engine::EngineState, OutDest, Signature, Span, Type, VarId}; +use crate::{engine::StateWorkingSet, ir::IrBlock, OutDest, Signature, Span, Type, VarId}; use serde::{Deserialize, Serialize}; #[derive(Debug, Clone, Serialize, Deserialize)] @@ -8,6 +8,8 @@ pub struct Block { pub pipelines: Vec, pub captures: Vec, pub redirect_env: bool, + /// The block compiled to IR instructions. Not available for subexpressions. + pub ir_block: Option, pub span: Option, // None option encodes no span to avoid using test_span() } @@ -22,10 +24,10 @@ impl Block { pub fn pipe_redirection( &self, - engine_state: &EngineState, + working_set: &StateWorkingSet, ) -> (Option, Option) { if let Some(first) = self.pipelines.first() { - first.pipe_redirection(engine_state) + first.pipe_redirection(working_set) } else { (None, None) } @@ -45,6 +47,7 @@ impl Block { pipelines: vec![], captures: vec![], redirect_env: false, + ir_block: None, span: None, } } @@ -55,6 +58,7 @@ impl Block { pipelines: Vec::with_capacity(capacity), captures: vec![], redirect_env: false, + ir_block: None, span: None, } } @@ -86,6 +90,7 @@ where pipelines: pipelines.collect(), captures: vec![], redirect_env: false, + ir_block: None, span: None, } } diff --git a/crates/nu-protocol/src/ast/expr.rs b/crates/nu-protocol/src/ast/expr.rs index 0e561e5c8f..43548d39e4 100644 --- a/crates/nu-protocol/src/ast/expr.rs +++ b/crates/nu-protocol/src/ast/expr.rs @@ -5,7 +5,9 @@ use super::{ Call, CellPath, Expression, ExternalArgument, FullCellPath, Keyword, MatchPattern, Operator, Range, Table, ValueWithUnit, }; -use crate::{ast::ImportPattern, engine::EngineState, BlockId, OutDest, Signature, Span, VarId}; +use crate::{ + ast::ImportPattern, engine::StateWorkingSet, BlockId, OutDest, Signature, Span, VarId, +}; #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub enum Expr { @@ -60,17 +62,17 @@ const _: () = assert!(std::mem::size_of::() <= 40); impl Expr { pub fn pipe_redirection( &self, - engine_state: &EngineState, + working_set: &StateWorkingSet, ) -> (Option, Option) { // Usages of `$in` will be wrapped by a `collect` call by the parser, // so we do not have to worry about that when considering // which of the expressions below may consume pipeline output. match self { - Expr::Call(call) => engine_state.get_decl(call.decl_id).pipe_redirection(), - Expr::Subexpression(block_id) | Expr::Block(block_id) => engine_state + Expr::Call(call) => working_set.get_decl(call.decl_id).pipe_redirection(), + Expr::Subexpression(block_id) | Expr::Block(block_id) => working_set .get_block(*block_id) - .pipe_redirection(engine_state), - Expr::FullCellPath(cell_path) => cell_path.head.expr.pipe_redirection(engine_state), + .pipe_redirection(working_set), + Expr::FullCellPath(cell_path) => cell_path.head.expr.pipe_redirection(working_set), Expr::Bool(_) | Expr::Int(_) | Expr::Float(_) diff --git a/crates/nu-protocol/src/ast/match_pattern.rs b/crates/nu-protocol/src/ast/match_pattern.rs index b8f87c3f63..1aafe84701 100644 --- a/crates/nu-protocol/src/ast/match_pattern.rs +++ b/crates/nu-protocol/src/ast/match_pattern.rs @@ -5,7 +5,7 @@ use serde::{Deserialize, Serialize}; #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub struct MatchPattern { pub pattern: Pattern, - pub guard: Option, + pub guard: Option>, pub span: Span, } @@ -19,7 +19,9 @@ impl MatchPattern { pub enum Pattern { Record(Vec<(String, MatchPattern)>), List(Vec), - Value(Expression), + // TODO: it would be nice if this didn't depend on AST + // maybe const evaluation can get us to a Value instead? + Value(Box), Variable(VarId), Or(Vec), Rest(VarId), // the ..$foo pattern diff --git a/crates/nu-protocol/src/ast/pipeline.rs b/crates/nu-protocol/src/ast/pipeline.rs index f03c016daf..3f2a216485 100644 --- a/crates/nu-protocol/src/ast/pipeline.rs +++ b/crates/nu-protocol/src/ast/pipeline.rs @@ -1,8 +1,4 @@ -use crate::{ - ast::Expression, - engine::{EngineState, StateWorkingSet}, - OutDest, Span, -}; +use crate::{ast::Expression, engine::StateWorkingSet, OutDest, Span}; use serde::{Deserialize, Serialize}; use std::fmt::Display; @@ -120,9 +116,9 @@ impl PipelineElement { pub fn pipe_redirection( &self, - engine_state: &EngineState, + working_set: &StateWorkingSet, ) -> (Option, Option) { - self.expr.expr.pipe_redirection(engine_state) + self.expr.expr.pipe_redirection(working_set) } } @@ -166,10 +162,10 @@ impl Pipeline { pub fn pipe_redirection( &self, - engine_state: &EngineState, + working_set: &StateWorkingSet, ) -> (Option, Option) { if let Some(first) = self.elements.first() { - first.pipe_redirection(engine_state) + first.pipe_redirection(working_set) } else { (None, None) } diff --git a/crates/nu-protocol/src/engine/argument.rs b/crates/nu-protocol/src/engine/argument.rs new file mode 100644 index 0000000000..043654b761 --- /dev/null +++ b/crates/nu-protocol/src/engine/argument.rs @@ -0,0 +1,124 @@ +use std::sync::Arc; + +use crate::{ast::Expression, ir::DataSlice, Span, Value}; + +/// Represents a fully evaluated argument to a call. +#[derive(Debug, Clone)] +pub enum Argument { + /// A positional argument + Positional { + span: Span, + val: Value, + ast: Option>, + }, + /// A spread argument, e.g. `...$args` + Spread { + span: Span, + vals: Value, + ast: Option>, + }, + /// A named argument with no value, e.g. `--flag` + Flag { + data: Arc<[u8]>, + name: DataSlice, + short: DataSlice, + span: Span, + }, + /// A named argument with a value, e.g. `--flag value` or `--flag=` + Named { + data: Arc<[u8]>, + name: DataSlice, + short: DataSlice, + span: Span, + val: Value, + ast: Option>, + }, + /// Information generated by the parser for use by certain keyword commands + ParserInfo { + data: Arc<[u8]>, + name: DataSlice, + // TODO: rather than `Expression`, this would probably be best served by a specific enum + // type for this purpose. + info: Box, + }, +} + +impl Argument { + /// The span encompassing the argument's usage within the call, distinct from the span of the + /// actual value of the argument. + pub fn span(&self) -> Option { + match self { + Argument::Positional { span, .. } => Some(*span), + Argument::Spread { span, .. } => Some(*span), + Argument::Flag { span, .. } => Some(*span), + Argument::Named { span, .. } => Some(*span), + // Because `ParserInfo` is generated, its span shouldn't be used + Argument::ParserInfo { .. } => None, + } + } + + /// The original AST [`Expression`] for the argument's value. This is not usually available; + /// declarations have to opt-in if they require this. + pub fn ast_expression(&self) -> Option<&Arc> { + match self { + Argument::Positional { ast, .. } => ast.as_ref(), + Argument::Spread { ast, .. } => ast.as_ref(), + Argument::Flag { .. } => None, + Argument::Named { ast, .. } => ast.as_ref(), + Argument::ParserInfo { .. } => None, + } + } +} + +/// Stores the argument context for calls in IR evaluation. +#[derive(Debug, Clone)] +pub struct ArgumentStack { + arguments: Vec, +} + +impl ArgumentStack { + /// Create a new, empty argument stack. + pub const fn new() -> Self { + ArgumentStack { arguments: vec![] } + } + + /// Returns the index of the end of the argument stack. Call and save this before adding + /// arguments. + pub fn get_base(&self) -> usize { + self.arguments.len() + } + + /// Calculates the number of arguments past the given [previously retrieved](.get_base) base + /// pointer. + pub fn get_len(&self, base: usize) -> usize { + self.arguments.len().checked_sub(base).unwrap_or_else(|| { + panic!( + "base ({}) is beyond the end of the arguments stack ({})", + base, + self.arguments.len() + ); + }) + } + + /// Push an argument onto the end of the argument stack. + pub fn push(&mut self, argument: Argument) { + self.arguments.push(argument); + } + + /// Clear all of the arguments after the given base index, to prepare for the next frame. + pub fn leave_frame(&mut self, base: usize) { + self.arguments.truncate(base); + } + + /// Get arguments for the frame based on the given [`base`](`.get_base()`) and + /// [`len`](`.get_len()`) parameters. + pub fn get_args(&self, base: usize, len: usize) -> &[Argument] { + &self.arguments[base..(base + len)] + } + + /// Move arguments for the frame based on the given [`base`](`.get_base()`) and + /// [`len`](`.get_len()`) parameters. + pub fn drain_args(&mut self, base: usize, len: usize) -> impl Iterator + '_ { + self.arguments.drain(base..(base + len)) + } +} diff --git a/crates/nu-protocol/src/engine/call.rs b/crates/nu-protocol/src/engine/call.rs new file mode 100644 index 0000000000..741e2bd87a --- /dev/null +++ b/crates/nu-protocol/src/engine/call.rs @@ -0,0 +1,223 @@ +use crate::{ + ast::{self, Expression}, + ir, DeclId, FromValue, ShellError, Span, Value, +}; + +use super::{EngineState, Stack, StateWorkingSet}; + +/// This is a HACK to help [`Command`](super::Command) support both the old AST evaluator and the +/// new IR evaluator at the same time. It should be removed once we are satisfied with the new +/// evaluator. +#[derive(Debug, Clone)] +pub struct Call<'a> { + pub head: Span, + pub decl_id: DeclId, + pub inner: CallImpl<'a>, +} + +#[derive(Debug, Clone)] +pub enum CallImpl<'a> { + AstRef(&'a ast::Call), + AstBox(Box), + IrRef(&'a ir::Call), + IrBox(Box), +} + +impl Call<'_> { + /// Returns a new AST call with the given span. This is often used by commands that need an + /// empty call to pass to a command. It's not easily possible to add anything to this. + pub fn new(span: Span) -> Self { + // this is using the boxed variant, which isn't so efficient... but this is only temporary + // anyway. + Call { + head: span, + decl_id: 0, + inner: CallImpl::AstBox(Box::new(ast::Call::new(span))), + } + } + + /// Convert the `Call` from any lifetime into `'static`, by cloning the data within onto the + /// heap. + pub fn to_owned(&self) -> Call<'static> { + Call { + head: self.head, + decl_id: self.decl_id, + inner: self.inner.to_owned(), + } + } + + /// Assert that the call is `ast::Call`, and fail with an error if it isn't. + /// + /// Provided as a stop-gap for commands that can't work with `ir::Call`, or just haven't been + /// implemented yet. Eventually these issues should be resolved and then this can be removed. + pub fn assert_ast_call(&self) -> Result<&ast::Call, ShellError> { + match &self.inner { + CallImpl::AstRef(call) => Ok(call), + CallImpl::AstBox(call) => Ok(call), + _ => Err(ShellError::NushellFailedSpanned { + msg: "Can't be used in IR context".into(), + label: "this command is not yet supported by IR evaluation".into(), + span: self.head, + }), + } + } + + /// FIXME: implementation asserts `ast::Call` and proxies to that + pub fn has_flag_const( + &self, + working_set: &StateWorkingSet, + flag_name: &str, + ) -> Result { + self.assert_ast_call()? + .has_flag_const(working_set, flag_name) + } + + /// FIXME: implementation asserts `ast::Call` and proxies to that + pub fn get_flag_const( + &self, + working_set: &StateWorkingSet, + name: &str, + ) -> Result, ShellError> { + self.assert_ast_call()?.get_flag_const(working_set, name) + } + + /// FIXME: implementation asserts `ast::Call` and proxies to that + pub fn req_const( + &self, + working_set: &StateWorkingSet, + pos: usize, + ) -> Result { + self.assert_ast_call()?.req_const(working_set, pos) + } + + /// FIXME: implementation asserts `ast::Call` and proxies to that + pub fn rest_const( + &self, + working_set: &StateWorkingSet, + starting_pos: usize, + ) -> Result, ShellError> { + self.assert_ast_call()? + .rest_const(working_set, starting_pos) + } + + /// Returns a span covering the call's arguments. + pub fn arguments_span(&self) -> Span { + match &self.inner { + CallImpl::AstRef(call) => call.arguments_span(), + CallImpl::AstBox(call) => call.arguments_span(), + CallImpl::IrRef(call) => call.arguments_span(), + CallImpl::IrBox(call) => call.arguments_span(), + } + } + + /// Returns a span covering the whole call. + pub fn span(&self) -> Span { + match &self.inner { + CallImpl::AstRef(call) => call.span(), + CallImpl::AstBox(call) => call.span(), + CallImpl::IrRef(call) => call.span(), + CallImpl::IrBox(call) => call.span(), + } + } + + /// Get a parser info argument by name. + pub fn get_parser_info<'a>(&'a self, stack: &'a Stack, name: &str) -> Option<&'a Expression> { + match &self.inner { + CallImpl::AstRef(call) => call.get_parser_info(name), + CallImpl::AstBox(call) => call.get_parser_info(name), + CallImpl::IrRef(call) => call.get_parser_info(stack, name), + CallImpl::IrBox(call) => call.get_parser_info(stack, name), + } + } + + /// Evaluator-agnostic implementation of `rest_iter_flattened()`. Evaluates or gets all of the + /// positional and spread arguments, flattens spreads, and then returns one list of values. + pub fn rest_iter_flattened( + &self, + engine_state: &EngineState, + stack: &mut Stack, + eval_expression: fn( + &EngineState, + &mut Stack, + &ast::Expression, + ) -> Result, + starting_pos: usize, + ) -> Result, ShellError> { + fn by_ast( + call: &ast::Call, + engine_state: &EngineState, + stack: &mut Stack, + eval_expression: fn( + &EngineState, + &mut Stack, + &ast::Expression, + ) -> Result, + starting_pos: usize, + ) -> Result, ShellError> { + call.rest_iter_flattened(starting_pos, |expr| { + eval_expression(engine_state, stack, expr) + }) + } + + fn by_ir( + call: &ir::Call, + stack: &Stack, + starting_pos: usize, + ) -> Result, ShellError> { + call.rest_iter_flattened(stack, starting_pos) + } + + match &self.inner { + CallImpl::AstRef(call) => { + by_ast(call, engine_state, stack, eval_expression, starting_pos) + } + CallImpl::AstBox(call) => { + by_ast(call, engine_state, stack, eval_expression, starting_pos) + } + CallImpl::IrRef(call) => by_ir(call, stack, starting_pos), + CallImpl::IrBox(call) => by_ir(call, stack, starting_pos), + } + } + + /// Get the original AST expression for a positional argument. Does not usually work for IR + /// unless the decl specified `requires_ast_for_arguments()` + pub fn positional_nth<'a>(&'a self, stack: &'a Stack, index: usize) -> Option<&'a Expression> { + match &self.inner { + CallImpl::AstRef(call) => call.positional_nth(index), + CallImpl::AstBox(call) => call.positional_nth(index), + CallImpl::IrRef(call) => call.positional_ast(stack, index).map(|arc| arc.as_ref()), + CallImpl::IrBox(call) => call.positional_ast(stack, index).map(|arc| arc.as_ref()), + } + } +} + +impl CallImpl<'_> { + pub fn to_owned(&self) -> CallImpl<'static> { + match self { + CallImpl::AstRef(call) => CallImpl::AstBox(Box::new((*call).clone())), + CallImpl::AstBox(call) => CallImpl::AstBox(call.clone()), + CallImpl::IrRef(call) => CallImpl::IrBox(Box::new((*call).clone())), + CallImpl::IrBox(call) => CallImpl::IrBox(call.clone()), + } + } +} + +impl<'a> From<&'a ast::Call> for Call<'a> { + fn from(call: &'a ast::Call) -> Self { + Call { + head: call.head, + decl_id: call.decl_id, + inner: CallImpl::AstRef(call), + } + } +} + +impl<'a> From<&'a ir::Call> for Call<'a> { + fn from(call: &'a ir::Call) -> Self { + Call { + head: call.head, + decl_id: call.decl_id, + inner: CallImpl::IrRef(call), + } + } +} diff --git a/crates/nu-protocol/src/engine/command.rs b/crates/nu-protocol/src/engine/command.rs index 043d2a66c7..48cdc4440d 100644 --- a/crates/nu-protocol/src/engine/command.rs +++ b/crates/nu-protocol/src/engine/command.rs @@ -1,5 +1,5 @@ use super::{EngineState, Stack, StateWorkingSet}; -use crate::{ast::Call, Alias, BlockId, Example, OutDest, PipelineData, ShellError, Signature}; +use crate::{engine::Call, Alias, BlockId, Example, OutDest, PipelineData, ShellError, Signature}; use std::fmt::Display; #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -124,6 +124,12 @@ pub trait Command: Send + Sync + CommandClone { fn pipe_redirection(&self) -> (Option, Option) { (None, None) } + + /// Return true if the AST nodes for the arguments are required for IR evaluation. This is + /// currently inefficient so is not generally done. + fn requires_ast_for_arguments(&self) -> bool { + false + } } pub trait CommandClone { diff --git a/crates/nu-protocol/src/engine/error_handler.rs b/crates/nu-protocol/src/engine/error_handler.rs new file mode 100644 index 0000000000..076678be20 --- /dev/null +++ b/crates/nu-protocol/src/engine/error_handler.rs @@ -0,0 +1,55 @@ +use crate::RegId; + +/// Describes an error handler stored during IR evaluation. +#[derive(Debug, Clone, Copy)] +pub struct ErrorHandler { + /// Instruction index within the block that will handle the error + pub handler_index: usize, + /// Register to put the error information into, when an error occurs + pub error_register: Option, +} + +/// Keeps track of error handlers pushed during evaluation of an IR block. +#[derive(Debug, Clone)] +pub struct ErrorHandlerStack { + handlers: Vec, +} + +impl ErrorHandlerStack { + pub const fn new() -> ErrorHandlerStack { + ErrorHandlerStack { handlers: vec![] } + } + + /// Get the current base of the stack, which establishes a frame. + pub fn get_base(&self) -> usize { + self.handlers.len() + } + + /// Push a new error handler onto the stack. + pub fn push(&mut self, handler: ErrorHandler) { + self.handlers.push(handler); + } + + /// Try to pop an error handler from the stack. Won't go below `base`, to avoid retrieving a + /// handler belonging to a parent frame. + pub fn pop(&mut self, base: usize) -> Option { + if self.handlers.len() > base { + self.handlers.pop() + } else { + None + } + } + + /// Reset the stack to the state it was in at the beginning of the frame, in preparation to + /// return control to the parent frame. + pub fn leave_frame(&mut self, base: usize) { + if self.handlers.len() >= base { + self.handlers.truncate(base); + } else { + panic!( + "ErrorHandlerStack bug: tried to leave frame at {base}, but current base is {}", + self.get_base() + ) + } + } +} diff --git a/crates/nu-protocol/src/engine/mod.rs b/crates/nu-protocol/src/engine/mod.rs index c6e71afb37..1b1762fe3c 100644 --- a/crates/nu-protocol/src/engine/mod.rs +++ b/crates/nu-protocol/src/engine/mod.rs @@ -1,8 +1,11 @@ +mod argument; mod cached_file; +mod call; mod call_info; mod capture_block; mod command; mod engine_state; +mod error_handler; mod overlay; mod pattern_match; mod stack; @@ -14,10 +17,13 @@ mod variable; pub use cached_file::CachedFile; +pub use argument::*; +pub use call::*; pub use call_info::*; pub use capture_block::*; pub use command::*; pub use engine_state::*; +pub use error_handler::*; pub use overlay::*; pub use pattern_match::*; pub use stack::*; diff --git a/crates/nu-protocol/src/engine/stack.rs b/crates/nu-protocol/src/engine/stack.rs index 19726db9c0..b289c1ae8b 100644 --- a/crates/nu-protocol/src/engine/stack.rs +++ b/crates/nu-protocol/src/engine/stack.rs @@ -1,7 +1,7 @@ use crate::{ engine::{ - EngineState, Redirection, StackCallArgGuard, StackCaptureGuard, StackIoGuard, StackOutDest, - DEFAULT_OVERLAY_NAME, + ArgumentStack, EngineState, ErrorHandlerStack, Redirection, StackCallArgGuard, + StackCaptureGuard, StackIoGuard, StackOutDest, DEFAULT_OVERLAY_NAME, }, OutDest, ShellError, Span, Value, VarId, ENV_VARIABLE_ID, NU_VARIABLE_ID, }; @@ -41,6 +41,12 @@ pub struct Stack { pub env_hidden: HashMap>, /// List of active overlays pub active_overlays: Vec, + /// Argument stack for IR evaluation + pub arguments: ArgumentStack, + /// Error handler stack for IR evaluation + pub error_handlers: ErrorHandlerStack, + /// Set true to always use IR mode + pub use_ir: bool, pub recursion_count: u64, pub parent_stack: Option>, /// Variables that have been deleted (this is used to hide values from parent stack lookups) @@ -68,6 +74,9 @@ impl Stack { env_vars: Vec::new(), env_hidden: HashMap::new(), active_overlays: vec![DEFAULT_OVERLAY_NAME.to_string()], + arguments: ArgumentStack::new(), + error_handlers: ErrorHandlerStack::new(), + use_ir: false, recursion_count: 0, parent_stack: None, parent_deletions: vec![], @@ -85,6 +94,9 @@ impl Stack { env_vars: parent.env_vars.clone(), env_hidden: parent.env_hidden.clone(), active_overlays: parent.active_overlays.clone(), + arguments: ArgumentStack::new(), + error_handlers: ErrorHandlerStack::new(), + use_ir: parent.use_ir, recursion_count: parent.recursion_count, vars: vec![], parent_deletions: vec![], @@ -254,6 +266,9 @@ impl Stack { env_vars, env_hidden: self.env_hidden.clone(), active_overlays: self.active_overlays.clone(), + arguments: ArgumentStack::new(), + error_handlers: ErrorHandlerStack::new(), + use_ir: self.use_ir, recursion_count: self.recursion_count, parent_stack: None, parent_deletions: vec![], @@ -284,6 +299,9 @@ impl Stack { env_vars, env_hidden: self.env_hidden.clone(), active_overlays: self.active_overlays.clone(), + arguments: ArgumentStack::new(), + error_handlers: ErrorHandlerStack::new(), + use_ir: self.use_ir, recursion_count: self.recursion_count, parent_stack: None, parent_deletions: vec![], diff --git a/crates/nu-protocol/src/engine/state_working_set.rs b/crates/nu-protocol/src/engine/state_working_set.rs index af950b8321..8c3968a824 100644 --- a/crates/nu-protocol/src/engine/state_working_set.rs +++ b/crates/nu-protocol/src/engine/state_working_set.rs @@ -4,8 +4,8 @@ use crate::{ usage::build_usage, CachedFile, Command, CommandType, EngineState, OverlayFrame, StateDelta, Variable, VirtualPath, Visibility, }, - BlockId, Category, Config, DeclId, FileId, GetSpan, Module, ModuleId, ParseError, ParseWarning, - Span, SpanId, Type, Value, VarId, VirtualPathId, + BlockId, Category, CompileError, Config, DeclId, FileId, GetSpan, Module, ModuleId, ParseError, + ParseWarning, Span, SpanId, Type, Value, VarId, VirtualPathId, }; use core::panic; use std::{ @@ -31,6 +31,7 @@ pub struct StateWorkingSet<'a> { pub search_predecls: bool, pub parse_errors: Vec, pub parse_warnings: Vec, + pub compile_errors: Vec, } impl<'a> StateWorkingSet<'a> { @@ -50,6 +51,7 @@ impl<'a> StateWorkingSet<'a> { search_predecls: true, parse_errors: vec![], parse_warnings: vec![], + compile_errors: vec![], } } @@ -260,6 +262,12 @@ impl<'a> StateWorkingSet<'a> { } pub fn add_block(&mut self, block: Arc) -> BlockId { + log::trace!( + "block id={} added, has IR = {:?}", + self.num_blocks(), + block.ir_block.is_some() + ); + self.delta.blocks.push(block); self.num_blocks() - 1 diff --git a/crates/nu-protocol/src/errors/cli_error.rs b/crates/nu-protocol/src/errors/cli_error.rs index 003564f933..181839b948 100644 --- a/crates/nu-protocol/src/errors/cli_error.rs +++ b/crates/nu-protocol/src/errors/cli_error.rs @@ -107,4 +107,8 @@ impl<'src> miette::Diagnostic for CliError<'src> { fn related<'a>(&'a self) -> Option + 'a>> { self.0.related() } + + fn diagnostic_source(&self) -> Option<&dyn miette::Diagnostic> { + self.0.diagnostic_source() + } } diff --git a/crates/nu-protocol/src/errors/compile_error.rs b/crates/nu-protocol/src/errors/compile_error.rs new file mode 100644 index 0000000000..cc805a73ed --- /dev/null +++ b/crates/nu-protocol/src/errors/compile_error.rs @@ -0,0 +1,238 @@ +use crate::{RegId, Span}; +use miette::Diagnostic; +use serde::{Deserialize, Serialize}; +use thiserror::Error; + +/// An internal compiler error, generally means a Nushell bug rather than an issue with user error +/// since parsing and typechecking has already passed. +#[derive(Debug, Clone, Error, Diagnostic, PartialEq, Serialize, Deserialize)] +pub enum CompileError { + #[error("Register overflow.")] + #[diagnostic(code(nu::compile::register_overflow))] + RegisterOverflow { + #[label("the code being compiled is probably too large")] + block_span: Option, + }, + + #[error("Register {reg_id} was uninitialized when used, possibly reused.")] + #[diagnostic( + code(nu::compile::register_uninitialized), + help("this is a compiler bug. Please report it at https://github.com/nushell/nushell/issues/new\nfrom: {caller}"), + )] + RegisterUninitialized { reg_id: RegId, caller: String }, + + #[error("Register {reg_id} was uninitialized when used, possibly reused.")] + #[diagnostic( + code(nu::compile::register_uninitialized), + help("this is a compiler bug. Please report it at https://github.com/nushell/nushell/issues/new\nfrom: {caller}"), + )] + RegisterUninitializedWhilePushingInstruction { + reg_id: RegId, + caller: String, + instruction: String, + #[label("while adding this instruction: {instruction}")] + span: Span, + }, + + #[error("Block contains too much string data: maximum 4 GiB exceeded.")] + #[diagnostic( + code(nu::compile::data_overflow), + help("try loading the string data from a file instead") + )] + DataOverflow { + #[label("while compiling this block")] + block_span: Option, + }, + + #[error("Block contains too many files.")] + #[diagnostic( + code(nu::compile::register_overflow), + help("try using fewer file redirections") + )] + FileOverflow { + #[label("while compiling this block")] + block_span: Option, + }, + + #[error("Invalid redirect mode: File should not be specified by commands.")] + #[diagnostic( + code(nu::compile::invalid_redirect_mode), + help("this is a command bug. Please report it at https://github.com/nushell/nushell/issues/new") + )] + InvalidRedirectMode { + #[label("while compiling this expression")] + span: Span, + }, + + #[error("Encountered garbage, likely due to parse error.")] + #[diagnostic(code(nu::compile::garbage))] + Garbage { + #[label("garbage found here")] + span: Span, + }, + + #[error("Unsupported operator expression.")] + #[diagnostic(code(nu::compile::unsupported_operator_expression))] + UnsupportedOperatorExpression { + #[label("this expression is in operator position but is not an operator")] + span: Span, + }, + + #[error("Attempted access of $env by integer path.")] + #[diagnostic(code(nu::compile::access_env_by_int))] + AccessEnvByInt { + #[label("$env keys should be strings")] + span: Span, + }, + + #[error("Encountered invalid `{keyword}` keyword call.")] + #[diagnostic(code(nu::compile::invalid_keyword_call))] + InvalidKeywordCall { + keyword: String, + #[label("this call is not properly formed")] + span: Span, + }, + + #[error("Attempted to set branch target of non-branch instruction.")] + #[diagnostic( + code(nu::compile::set_branch_target_of_non_branch_instruction), + help("this is a compiler bug. Please report it at https://github.com/nushell/nushell/issues/new"), + )] + SetBranchTargetOfNonBranchInstruction { + instruction: String, + #[label("tried to modify: {instruction}")] + span: Span, + }, + + /// You're trying to run an unsupported external command. + /// + /// ## Resolution + /// + /// Make sure there's an appropriate `run-external` declaration for this external command. + #[error("External calls are not supported.")] + #[diagnostic( + code(nu::compile::run_external_not_found), + help("`run-external` was not found in scope") + )] + RunExternalNotFound { + #[label("can't be run in this context")] + span: Span, + }, + + /// Invalid assignment left-hand side + /// + /// ## Resolution + /// + /// Assignment requires that you assign to a variable or variable cell path. + #[error("Assignment operations require a variable.")] + #[diagnostic( + code(nu::compile::assignment_requires_variable), + help("try assigning to a variable or a cell path of a variable") + )] + AssignmentRequiresVar { + #[label("needs to be a variable")] + span: Span, + }, + + /// Invalid assignment left-hand side + /// + /// ## Resolution + /// + /// Assignment requires that you assign to a mutable variable or cell path. + #[error("Assignment to an immutable variable.")] + #[diagnostic( + code(nu::compile::assignment_requires_mutable_variable), + help("declare the variable with `mut`, or shadow it again with `let`") + )] + AssignmentRequiresMutableVar { + #[label("needs to be a mutable variable")] + span: Span, + }, + + /// This environment variable cannot be set manually. + /// + /// ## Resolution + /// + /// This environment variable is set automatically by Nushell and cannot not be set manually. + #[error("{envvar_name} cannot be set manually.")] + #[diagnostic( + code(nu::compile::automatic_env_var_set_manually), + help( + r#"The environment variable '{envvar_name}' is set automatically by Nushell and cannot be set manually."# + ) + )] + AutomaticEnvVarSetManually { + envvar_name: String, + #[label("cannot set '{envvar_name}' manually")] + span: Span, + }, + + /// It is not possible to replace the entire environment at once + /// + /// ## Resolution + /// + /// Setting the entire environment is not allowed. Change environment variables individually + /// instead. + #[error("Cannot replace environment.")] + #[diagnostic( + code(nu::compile::cannot_replace_env), + help("Assigning a value to '$env' is not allowed.") + )] + CannotReplaceEnv { + #[label("setting '$env' not allowed")] + span: Span, + }, + + #[error("Unexpected expression.")] + #[diagnostic(code(nu::compile::unexpected_expression))] + UnexpectedExpression { + expr_name: String, + #[label("{expr_name} is not allowed in this context")] + span: Span, + }, + + #[error("Missing required declaration: `{decl_name}`")] + #[diagnostic(code(nu::compile::missing_required_declaration))] + MissingRequiredDeclaration { + decl_name: String, + #[label("`{decl_name}` must be in scope to compile this expression")] + span: Span, + }, + + #[error("Invalid literal")] + #[diagnostic(code(nu::compile::invalid_literal))] + InvalidLiteral { + msg: String, + #[label("{msg}")] + span: Span, + }, + + #[error("{msg}")] + #[diagnostic(code(nu::compile::not_in_a_loop))] + NotInALoop { + msg: String, + #[label("can't be used outside of a loop")] + span: Option, + }, + + #[error("Incoherent loop state: the loop that ended was not the one we were expecting.")] + #[diagnostic( + code(nu::compile::incoherent_loop_state), + help("this is a compiler bug. Please report it at https://github.com/nushell/nushell/issues/new"), + )] + IncoherentLoopState { + #[label("while compiling this block")] + block_span: Option, + }, + + #[error("Undefined label `{label_id}`.")] + #[diagnostic( + code(nu::compile::undefined_label), + help("this is a compiler bug. Please report it at https://github.com/nushell/nushell/issues/new"), + )] + UndefinedLabel { + label_id: usize, + #[label("label was used while compiling this code")] + span: Option, + }, +} diff --git a/crates/nu-protocol/src/errors/mod.rs b/crates/nu-protocol/src/errors/mod.rs index 23006ab684..3f895cd65f 100644 --- a/crates/nu-protocol/src/errors/mod.rs +++ b/crates/nu-protocol/src/errors/mod.rs @@ -1,10 +1,12 @@ pub mod cli_error; +mod compile_error; mod labeled_error; mod parse_error; mod parse_warning; mod shell_error; pub use cli_error::{format_error, report_error, report_error_new}; +pub use compile_error::CompileError; pub use labeled_error::{ErrorLabel, LabeledError}; pub use parse_error::{DidYouMean, ParseError}; pub use parse_warning::ParseWarning; diff --git a/crates/nu-protocol/src/errors/shell_error.rs b/crates/nu-protocol/src/errors/shell_error.rs index be24fd093e..ab01ccfa54 100644 --- a/crates/nu-protocol/src/errors/shell_error.rs +++ b/crates/nu-protocol/src/errors/shell_error.rs @@ -1376,6 +1376,23 @@ On Windows, this would be %USERPROFILE%\AppData\Roaming"# help("Set XDG_CONFIG_HOME to an absolute path, or set it to an empty string to ignore it") )] InvalidXdgConfig { xdg: String, default: String }, + + /// An unexpected error occurred during IR evaluation. + /// + /// ## Resolution + /// + /// This is most likely a correctness issue with the IR compiler or evaluator. Please file a + /// bug with the minimum code needed to reproduce the issue, if possible. + #[error("IR evaluation error: {msg}")] + #[diagnostic( + code(nu::shell::ir_eval_error), + help("this is a bug, please report it at https://github.com/nushell/nushell/issues/new along with the code you were running if able") + )] + IrEvalError { + msg: String, + #[label = "while running this code"] + span: Option, + }, } // TODO: Implement as From trait diff --git a/crates/nu-protocol/src/eval_const.rs b/crates/nu-protocol/src/eval_const.rs index 87913e4ee3..5393e35e59 100644 --- a/crates/nu-protocol/src/eval_const.rs +++ b/crates/nu-protocol/src/eval_const.rs @@ -307,7 +307,7 @@ fn eval_const_call( return Err(ShellError::NotAConstHelp { span: call.head }); } - decl.run_const(working_set, call, input) + decl.run_const(working_set, &call.into(), input) } pub fn eval_const_subexpression( diff --git a/crates/nu-protocol/src/id.rs b/crates/nu-protocol/src/id.rs index 73c4f52e70..829ee8f36d 100644 --- a/crates/nu-protocol/src/id.rs +++ b/crates/nu-protocol/src/id.rs @@ -7,5 +7,19 @@ pub type ModuleId = usize; pub type OverlayId = usize; pub type FileId = usize; pub type VirtualPathId = usize; + #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)] pub struct SpanId(pub usize); // more robust ID style used in the new parser + +/// An ID for an [IR](crate::ir) register. `%n` is a common shorthand for `RegId(n)`. +/// +/// Note: `%0` is allocated with the block input at the beginning of a compiled block. +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)] +#[repr(transparent)] +pub struct RegId(pub u32); + +impl std::fmt::Display for RegId { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "%{}", self.0) + } +} diff --git a/crates/nu-protocol/src/ir/call.rs b/crates/nu-protocol/src/ir/call.rs new file mode 100644 index 0000000000..3d16f82eb6 --- /dev/null +++ b/crates/nu-protocol/src/ir/call.rs @@ -0,0 +1,351 @@ +use std::sync::Arc; + +use crate::{ + ast::Expression, + engine::{self, Argument, Stack}, + DeclId, ShellError, Span, Spanned, Value, +}; + +use super::DataSlice; + +/// Contains the information for a call being made to a declared command. +#[derive(Debug, Clone)] +pub struct Call { + /// The declaration ID of the command to be invoked. + pub decl_id: DeclId, + /// The span encompassing the command name, before the arguments. + pub head: Span, + /// The span encompassing the command name and all arguments. + pub span: Span, + /// The base index of the arguments for this call within the + /// [argument stack](crate::engine::ArgumentStack). + pub args_base: usize, + /// The number of [`Argument`]s for the call. Note that this just counts the number of + /// `Argument` entries on the stack, and has nothing to do with the actual number of positional + /// or spread arguments. + pub args_len: usize, +} + +impl Call { + /// Build a new call with arguments. + pub fn build(decl_id: DeclId, head: Span) -> CallBuilder { + CallBuilder { + inner: Call { + decl_id, + head, + span: head, + args_base: 0, + args_len: 0, + }, + } + } + + /// Get the arguments for this call from the arguments stack. + pub fn arguments<'a>(&self, stack: &'a Stack) -> &'a [Argument] { + stack.arguments.get_args(self.args_base, self.args_len) + } + + /// The span encompassing the arguments + /// + /// If there are no arguments the span covers where the first argument would exist + /// + /// If there are one or more arguments the span encompasses the start of the first argument to + /// end of the last argument + pub fn arguments_span(&self) -> Span { + let past = self.head.past(); + Span::new(past.start, self.span.end) + } + + /// The number of named arguments, with or without values. + pub fn named_len(&self, stack: &Stack) -> usize { + self.arguments(stack) + .iter() + .filter(|arg| matches!(arg, Argument::Named { .. } | Argument::Flag { .. })) + .count() + } + + /// Iterate through named arguments, with or without values. + pub fn named_iter<'a>( + &'a self, + stack: &'a Stack, + ) -> impl Iterator, Option<&'a Value>)> + 'a { + self.arguments(stack).iter().filter_map( + |arg: &Argument| -> Option<(Spanned<&str>, Option<&Value>)> { + match arg { + Argument::Flag { + data, name, span, .. + } => Some(( + Spanned { + item: std::str::from_utf8(&data[*name]).expect("invalid arg name"), + span: *span, + }, + None, + )), + Argument::Named { + data, + name, + span, + val, + .. + } => Some(( + Spanned { + item: std::str::from_utf8(&data[*name]).expect("invalid arg name"), + span: *span, + }, + Some(val), + )), + _ => None, + } + }, + ) + } + + /// Get a named argument's value by name. Returns [`None`] for named arguments with no value as + /// well. + pub fn get_named_arg<'a>(&self, stack: &'a Stack, flag_name: &str) -> Option<&'a Value> { + // Optimized to avoid str::from_utf8() + self.arguments(stack) + .iter() + .find_map(|arg: &Argument| -> Option> { + match arg { + Argument::Flag { data, name, .. } if &data[*name] == flag_name.as_bytes() => { + Some(None) + } + Argument::Named { + data, name, val, .. + } if &data[*name] == flag_name.as_bytes() => Some(Some(val)), + _ => None, + } + }) + .flatten() + } + + /// The number of positional arguments, excluding spread arguments. + pub fn positional_len(&self, stack: &Stack) -> usize { + self.arguments(stack) + .iter() + .filter(|arg| matches!(arg, Argument::Positional { .. })) + .count() + } + + /// Iterate through positional arguments. Does not include spread arguments. + pub fn positional_iter<'a>(&self, stack: &'a Stack) -> impl Iterator { + self.arguments(stack).iter().filter_map(|arg| match arg { + Argument::Positional { val, .. } => Some(val), + _ => None, + }) + } + + /// Get a positional argument by index. Does not include spread arguments. + pub fn positional_nth<'a>(&self, stack: &'a Stack, index: usize) -> Option<&'a Value> { + self.positional_iter(stack).nth(index) + } + + /// Get the AST node for a positional argument by index. Not usually available unless the decl + /// required it. + pub fn positional_ast<'a>( + &self, + stack: &'a Stack, + index: usize, + ) -> Option<&'a Arc> { + self.arguments(stack) + .iter() + .filter_map(|arg| match arg { + Argument::Positional { ast, .. } => Some(ast), + _ => None, + }) + .nth(index) + .and_then(|option| option.as_ref()) + } + + /// Returns every argument to the rest parameter, as well as whether each argument + /// is spread or a normal positional argument (true for spread, false for normal) + pub fn rest_iter<'a>( + &self, + stack: &'a Stack, + start: usize, + ) -> impl Iterator + 'a { + self.arguments(stack) + .iter() + .filter_map(|arg| match arg { + Argument::Positional { val, .. } => Some((val, false)), + Argument::Spread { vals, .. } => Some((vals, true)), + _ => None, + }) + .skip(start) + } + + /// Returns all of the positional arguments including and after `start`, with spread arguments + /// flattened into a single `Vec`. + pub fn rest_iter_flattened( + &self, + stack: &Stack, + start: usize, + ) -> Result, ShellError> { + let mut acc = vec![]; + for (rest_val, spread) in self.rest_iter(stack, start) { + if spread { + match rest_val { + Value::List { vals, .. } => acc.extend(vals.iter().cloned()), + Value::Error { error, .. } => return Err(ShellError::clone(error)), + _ => { + return Err(ShellError::CannotSpreadAsList { + span: rest_val.span(), + }) + } + } + } else { + acc.push(rest_val.clone()); + } + } + Ok(acc) + } + + /// Get a parser info argument by name. + pub fn get_parser_info<'a>(&self, stack: &'a Stack, name: &str) -> Option<&'a Expression> { + self.arguments(stack) + .iter() + .find_map(|argument| match argument { + Argument::ParserInfo { + data, + name: name_slice, + info: expr, + } if &data[*name_slice] == name.as_bytes() => Some(expr.as_ref()), + _ => None, + }) + } + + /// Returns a span encompassing the entire call. + pub fn span(&self) -> Span { + self.span + } + + /// Resets the [`Stack`] to its state before the call was made. + pub fn leave(&self, stack: &mut Stack) { + stack.arguments.leave_frame(self.args_base); + } +} + +/// Utility struct for building a [`Call`] with arguments on the [`Stack`]. +pub struct CallBuilder { + inner: Call, +} + +impl CallBuilder { + /// Add an argument to the [`Stack`] and reference it from the [`Call`]. + pub fn add_argument(&mut self, stack: &mut Stack, argument: Argument) -> &mut Self { + if self.inner.args_len == 0 { + self.inner.args_base = stack.arguments.get_base(); + } + self.inner.args_len += 1; + if let Some(span) = argument.span() { + self.inner.span = self.inner.span.append(span); + } + stack.arguments.push(argument); + self + } + + /// Add a positional argument to the [`Stack`] and reference it from the [`Call`]. + pub fn add_positional(&mut self, stack: &mut Stack, span: Span, val: Value) -> &mut Self { + self.add_argument( + stack, + Argument::Positional { + span, + val, + ast: None, + }, + ) + } + + /// Add a spread argument to the [`Stack`] and reference it from the [`Call`]. + pub fn add_spread(&mut self, stack: &mut Stack, span: Span, vals: Value) -> &mut Self { + self.add_argument( + stack, + Argument::Spread { + span, + vals, + ast: None, + }, + ) + } + + /// Add a flag (no-value named) argument to the [`Stack`] and reference it from the [`Call`]. + pub fn add_flag( + &mut self, + stack: &mut Stack, + name: impl AsRef, + short: impl AsRef, + span: Span, + ) -> &mut Self { + let (data, name, short) = data_from_name_and_short(name.as_ref(), short.as_ref()); + self.add_argument( + stack, + Argument::Flag { + data, + name, + short, + span, + }, + ) + } + + /// Add a named argument to the [`Stack`] and reference it from the [`Call`]. + pub fn add_named( + &mut self, + stack: &mut Stack, + name: impl AsRef, + short: impl AsRef, + span: Span, + val: Value, + ) -> &mut Self { + let (data, name, short) = data_from_name_and_short(name.as_ref(), short.as_ref()); + self.add_argument( + stack, + Argument::Named { + data, + name, + short, + span, + val, + ast: None, + }, + ) + } + + /// Produce the finished [`Call`] from the builder. + /// + /// The call should be entered / run before any other calls are constructed, because the + /// argument stack will be reset when they exit. + pub fn finish(&self) -> Call { + self.inner.clone() + } + + /// Run a closure with the [`Call`] as an [`engine::Call`] reference, and then clean up the + /// arguments that were added to the [`Stack`] after. + /// + /// For convenience. Calls [`Call::leave`] after the closure ends. + pub fn with( + self, + stack: &mut Stack, + f: impl FnOnce(&mut Stack, &engine::Call<'_>) -> T, + ) -> T { + let call = engine::Call::from(&self.inner); + let result = f(stack, &call); + self.inner.leave(stack); + result + } +} + +fn data_from_name_and_short(name: &str, short: &str) -> (Arc<[u8]>, DataSlice, DataSlice) { + let data: Vec = name.bytes().chain(short.bytes()).collect(); + let data: Arc<[u8]> = data.into(); + let name = DataSlice { + start: 0, + len: name.len().try_into().expect("flag name too big"), + }; + let short = DataSlice { + start: name.start.checked_add(name.len).expect("flag name too big"), + len: short.len().try_into().expect("flag short name too big"), + }; + (data, name, short) +} diff --git a/crates/nu-protocol/src/ir/display.rs b/crates/nu-protocol/src/ir/display.rs new file mode 100644 index 0000000000..c28323cca4 --- /dev/null +++ b/crates/nu-protocol/src/ir/display.rs @@ -0,0 +1,452 @@ +use std::fmt; + +use crate::{ast::Pattern, engine::EngineState, DeclId, VarId}; + +use super::{DataSlice, Instruction, IrBlock, Literal, RedirectMode}; + +pub struct FmtIrBlock<'a> { + pub(super) engine_state: &'a EngineState, + pub(super) ir_block: &'a IrBlock, +} + +impl<'a> fmt::Display for FmtIrBlock<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let plural = |count| if count == 1 { "" } else { "s" }; + writeln!( + f, + "# {} register{}, {} instruction{}, {} byte{} of data", + self.ir_block.register_count, + plural(self.ir_block.register_count as usize), + self.ir_block.instructions.len(), + plural(self.ir_block.instructions.len()), + self.ir_block.data.len(), + plural(self.ir_block.data.len()), + )?; + if self.ir_block.file_count > 0 { + writeln!( + f, + "# {} file{} used for redirection", + self.ir_block.file_count, + plural(self.ir_block.file_count as usize) + )?; + } + for (index, instruction) in self.ir_block.instructions.iter().enumerate() { + let formatted = format!( + "{:-4}: {}", + index, + FmtInstruction { + engine_state: self.engine_state, + instruction, + data: &self.ir_block.data, + } + ); + let comment = &self.ir_block.comments[index]; + if comment.is_empty() { + writeln!(f, "{formatted}")?; + } else { + writeln!(f, "{formatted:40} # {comment}")?; + } + } + Ok(()) + } +} + +pub struct FmtInstruction<'a> { + pub(super) engine_state: &'a EngineState, + pub(super) instruction: &'a Instruction, + pub(super) data: &'a [u8], +} + +impl<'a> fmt::Display for FmtInstruction<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + const WIDTH: usize = 22; + + match self.instruction { + Instruction::Unreachable => { + write!(f, "{:WIDTH$}", "unreachable") + } + Instruction::LoadLiteral { dst, lit } => { + let lit = FmtLiteral { + literal: lit, + data: self.data, + }; + write!(f, "{:WIDTH$} {dst}, {lit}", "load-literal") + } + Instruction::LoadValue { dst, val } => { + let val = val.to_debug_string(); + write!(f, "{:WIDTH$} {dst}, {val}", "load-value") + } + Instruction::Move { dst, src } => { + write!(f, "{:WIDTH$} {dst}, {src}", "move") + } + Instruction::Clone { dst, src } => { + write!(f, "{:WIDTH$} {dst}, {src}", "clone") + } + Instruction::Collect { src_dst } => { + write!(f, "{:WIDTH$} {src_dst}", "collect") + } + Instruction::Span { src_dst } => { + write!(f, "{:WIDTH$} {src_dst}", "span") + } + Instruction::Drop { src } => { + write!(f, "{:WIDTH$} {src}", "drop") + } + Instruction::Drain { src } => { + write!(f, "{:WIDTH$} {src}", "drain") + } + Instruction::LoadVariable { dst, var_id } => { + let var = FmtVar::new(self.engine_state, *var_id); + write!(f, "{:WIDTH$} {dst}, {var}", "load-variable") + } + Instruction::StoreVariable { var_id, src } => { + let var = FmtVar::new(self.engine_state, *var_id); + write!(f, "{:WIDTH$} {var}, {src}", "store-variable") + } + Instruction::LoadEnv { dst, key } => { + let key = FmtData(self.data, *key); + write!(f, "{:WIDTH$} {dst}, {key}", "load-env") + } + Instruction::LoadEnvOpt { dst, key } => { + let key = FmtData(self.data, *key); + write!(f, "{:WIDTH$} {dst}, {key}", "load-env-opt") + } + Instruction::StoreEnv { key, src } => { + let key = FmtData(self.data, *key); + write!(f, "{:WIDTH$} {key}, {src}", "store-env") + } + Instruction::PushPositional { src } => { + write!(f, "{:WIDTH$} {src}", "push-positional") + } + Instruction::AppendRest { src } => { + write!(f, "{:WIDTH$} {src}", "append-rest") + } + Instruction::PushFlag { name } => { + let name = FmtData(self.data, *name); + write!(f, "{:WIDTH$} {name}", "push-flag") + } + Instruction::PushShortFlag { short } => { + let short = FmtData(self.data, *short); + write!(f, "{:WIDTH$} {short}", "push-short-flag") + } + Instruction::PushNamed { name, src } => { + let name = FmtData(self.data, *name); + write!(f, "{:WIDTH$} {name}, {src}", "push-named") + } + Instruction::PushShortNamed { short, src } => { + let short = FmtData(self.data, *short); + write!(f, "{:WIDTH$} {short}, {src}", "push-short-named") + } + Instruction::PushParserInfo { name, info } => { + let name = FmtData(self.data, *name); + write!(f, "{:WIDTH$} {name}, {info:?}", "push-parser-info") + } + Instruction::RedirectOut { mode } => { + write!(f, "{:WIDTH$} {mode}", "redirect-out") + } + Instruction::RedirectErr { mode } => { + write!(f, "{:WIDTH$} {mode}", "redirect-err") + } + Instruction::CheckErrRedirected { src } => { + write!(f, "{:WIDTH$} {src}", "check-err-redirected") + } + Instruction::OpenFile { + file_num, + path, + append, + } => { + write!( + f, + "{:WIDTH$} file({file_num}), {path}, append = {append:?}", + "open-file" + ) + } + Instruction::WriteFile { file_num, src } => { + write!(f, "{:WIDTH$} file({file_num}), {src}", "write-file") + } + Instruction::CloseFile { file_num } => { + write!(f, "{:WIDTH$} file({file_num})", "close-file") + } + Instruction::Call { decl_id, src_dst } => { + let decl = FmtDecl::new(self.engine_state, *decl_id); + write!(f, "{:WIDTH$} {decl}, {src_dst}", "call") + } + Instruction::StringAppend { src_dst, val } => { + write!(f, "{:WIDTH$} {src_dst}, {val}", "string-append") + } + Instruction::GlobFrom { src_dst, no_expand } => { + let no_expand = if *no_expand { "no-expand" } else { "expand" }; + write!(f, "{:WIDTH$} {src_dst}, {no_expand}", "glob-from",) + } + Instruction::ListPush { src_dst, item } => { + write!(f, "{:WIDTH$} {src_dst}, {item}", "list-push") + } + Instruction::ListSpread { src_dst, items } => { + write!(f, "{:WIDTH$} {src_dst}, {items}", "list-spread") + } + Instruction::RecordInsert { src_dst, key, val } => { + write!(f, "{:WIDTH$} {src_dst}, {key}, {val}", "record-insert") + } + Instruction::RecordSpread { src_dst, items } => { + write!(f, "{:WIDTH$} {src_dst}, {items}", "record-spread") + } + Instruction::Not { src_dst } => { + write!(f, "{:WIDTH$} {src_dst}", "not") + } + Instruction::BinaryOp { lhs_dst, op, rhs } => { + write!(f, "{:WIDTH$} {lhs_dst}, {op:?}, {rhs}", "binary-op") + } + Instruction::FollowCellPath { src_dst, path } => { + write!(f, "{:WIDTH$} {src_dst}, {path}", "follow-cell-path") + } + Instruction::CloneCellPath { dst, src, path } => { + write!(f, "{:WIDTH$} {dst}, {src}, {path}", "clone-cell-path") + } + Instruction::UpsertCellPath { + src_dst, + path, + new_value, + } => { + write!( + f, + "{:WIDTH$} {src_dst}, {path}, {new_value}", + "upsert-cell-path" + ) + } + Instruction::Jump { index } => { + write!(f, "{:WIDTH$} {index}", "jump") + } + Instruction::BranchIf { cond, index } => { + write!(f, "{:WIDTH$} {cond}, {index}", "branch-if") + } + Instruction::BranchIfEmpty { src, index } => { + write!(f, "{:WIDTH$} {src}, {index}", "branch-if-empty") + } + Instruction::Match { + pattern, + src, + index, + } => { + let pattern = FmtPattern { + engine_state: self.engine_state, + pattern, + }; + write!(f, "{:WIDTH$} ({pattern}), {src}, {index}", "match") + } + Instruction::CheckMatchGuard { src } => { + write!(f, "{:WIDTH$} {src}", "check-match-guard") + } + Instruction::Iterate { + dst, + stream, + end_index, + } => { + write!(f, "{:WIDTH$} {dst}, {stream}, end {end_index}", "iterate") + } + Instruction::OnError { index } => { + write!(f, "{:WIDTH$} {index}", "on-error") + } + Instruction::OnErrorInto { index, dst } => { + write!(f, "{:WIDTH$} {index}, {dst}", "on-error-into") + } + Instruction::PopErrorHandler => { + write!(f, "{:WIDTH$}", "pop-error-handler") + } + Instruction::CheckExternalFailed { dst, src } => { + write!(f, "{:WIDTH$} {dst}, {src}", "check-external-failed") + } + Instruction::ReturnEarly { src } => { + write!(f, "{:WIDTH$} {src}", "return-early") + } + Instruction::Return { src } => { + write!(f, "{:WIDTH$} {src}", "return") + } + } + } +} + +struct FmtDecl<'a>(DeclId, &'a str); + +impl<'a> FmtDecl<'a> { + fn new(engine_state: &'a EngineState, decl_id: DeclId) -> Self { + FmtDecl(decl_id, engine_state.get_decl(decl_id).name()) + } +} + +impl fmt::Display for FmtDecl<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "decl {} {:?}", self.0, self.1) + } +} + +struct FmtVar<'a>(DeclId, Option<&'a str>); + +impl<'a> FmtVar<'a> { + fn new(engine_state: &'a EngineState, var_id: VarId) -> Self { + // Search for the name of the variable + let name: Option<&str> = engine_state + .active_overlays(&[]) + .flat_map(|overlay| overlay.vars.iter()) + .find(|(_, v)| **v == var_id) + .map(|(k, _)| std::str::from_utf8(k).unwrap_or("")); + FmtVar(var_id, name) + } +} + +impl fmt::Display for FmtVar<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if let Some(name) = self.1 { + write!(f, "var {} {:?}", self.0, name) + } else { + write!(f, "var {}", self.0) + } + } +} + +impl fmt::Display for RedirectMode { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + RedirectMode::Pipe => write!(f, "pipe"), + RedirectMode::Capture => write!(f, "capture"), + RedirectMode::Null => write!(f, "null"), + RedirectMode::Inherit => write!(f, "inherit"), + RedirectMode::File { file_num } => write!(f, "file({file_num})"), + RedirectMode::Caller => write!(f, "caller"), + } + } +} + +struct FmtData<'a>(&'a [u8], DataSlice); + +impl<'a> fmt::Display for FmtData<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if let Ok(s) = std::str::from_utf8(&self.0[self.1]) { + // Write as string + write!(f, "{s:?}") + } else { + // Write as byte array + write!(f, "0x{:x?}", self.0) + } + } +} + +struct FmtLiteral<'a> { + literal: &'a Literal, + data: &'a [u8], +} + +impl<'a> fmt::Display for FmtLiteral<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self.literal { + Literal::Bool(b) => write!(f, "bool({b:?})"), + Literal::Int(i) => write!(f, "int({i:?})"), + Literal::Float(fl) => write!(f, "float({fl:?})"), + Literal::Filesize(q) => write!(f, "filesize({q}b)"), + Literal::Duration(q) => write!(f, "duration({q}ns)"), + Literal::Binary(b) => write!(f, "binary({})", FmtData(self.data, *b)), + Literal::Block(id) => write!(f, "block({id})"), + Literal::Closure(id) => write!(f, "closure({id})"), + Literal::RowCondition(id) => write!(f, "row_condition({id})"), + Literal::Range { + start, + step, + end, + inclusion, + } => write!(f, "range({start}, {step}, {end}, {inclusion:?})"), + Literal::List { capacity } => write!(f, "list(capacity = {capacity})"), + Literal::Record { capacity } => write!(f, "record(capacity = {capacity})"), + Literal::Filepath { val, no_expand } => write!( + f, + "filepath({}, no_expand = {no_expand:?})", + FmtData(self.data, *val) + ), + Literal::Directory { val, no_expand } => write!( + f, + "directory({}, no_expand = {no_expand:?})", + FmtData(self.data, *val) + ), + Literal::GlobPattern { val, no_expand } => write!( + f, + "glob-pattern({}, no_expand = {no_expand:?})", + FmtData(self.data, *val) + ), + Literal::String(s) => write!(f, "string({})", FmtData(self.data, *s)), + Literal::RawString(rs) => write!(f, "raw-string({})", FmtData(self.data, *rs)), + Literal::CellPath(p) => write!(f, "cell-path({p})"), + Literal::Date(dt) => write!(f, "date({dt})"), + Literal::Nothing => write!(f, "nothing"), + } + } +} + +struct FmtPattern<'a> { + engine_state: &'a EngineState, + pattern: &'a Pattern, +} + +impl<'a> fmt::Display for FmtPattern<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self.pattern { + Pattern::Record(bindings) => { + f.write_str("{")?; + for (name, pattern) in bindings { + write!( + f, + "{}: {}", + name, + FmtPattern { + engine_state: self.engine_state, + pattern: &pattern.pattern, + } + )?; + } + f.write_str("}") + } + Pattern::List(bindings) => { + f.write_str("[")?; + for pattern in bindings { + write!( + f, + "{}", + FmtPattern { + engine_state: self.engine_state, + pattern: &pattern.pattern + } + )?; + } + f.write_str("]") + } + Pattern::Value(expr) => { + let string = + String::from_utf8_lossy(self.engine_state.get_span_contents(expr.span)); + f.write_str(&string) + } + Pattern::Variable(var_id) => { + let variable = FmtVar::new(self.engine_state, *var_id); + write!(f, "{}", variable) + } + Pattern::Or(patterns) => { + for (index, pattern) in patterns.iter().enumerate() { + if index > 0 { + f.write_str(" | ")?; + } + write!( + f, + "{}", + FmtPattern { + engine_state: self.engine_state, + pattern: &pattern.pattern + } + )?; + } + Ok(()) + } + Pattern::Rest(var_id) => { + let variable = FmtVar::new(self.engine_state, *var_id); + write!(f, "..{}", variable) + } + Pattern::IgnoreRest => f.write_str(".."), + Pattern::IgnoreValue => f.write_str("_"), + Pattern::Garbage => f.write_str(""), + } + } +} diff --git a/crates/nu-protocol/src/ir/mod.rs b/crates/nu-protocol/src/ir/mod.rs new file mode 100644 index 0000000000..28677b743c --- /dev/null +++ b/crates/nu-protocol/src/ir/mod.rs @@ -0,0 +1,419 @@ +use std::{fmt, sync::Arc}; + +use crate::{ + ast::{CellPath, Expression, Operator, Pattern, RangeInclusion}, + engine::EngineState, + BlockId, DeclId, RegId, Span, Value, VarId, +}; + +use chrono::{DateTime, FixedOffset}; +use serde::{Deserialize, Serialize}; + +mod call; +mod display; + +pub use call::*; +pub use display::{FmtInstruction, FmtIrBlock}; + +#[derive(Clone, Serialize, Deserialize)] +pub struct IrBlock { + pub instructions: Vec, + pub spans: Vec, + #[serde(with = "serde_arc_u8_array")] + pub data: Arc<[u8]>, + pub ast: Vec>, + /// Additional information that can be added to help with debugging + pub comments: Vec>, + pub register_count: u32, + pub file_count: u32, +} + +impl fmt::Debug for IrBlock { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // the ast field is too verbose and doesn't add much + f.debug_struct("IrBlock") + .field("instructions", &self.instructions) + .field("spans", &self.spans) + .field("data", &self.data) + .field("comments", &self.comments) + .field("register_count", &self.register_count) + .field("file_count", &self.register_count) + .finish_non_exhaustive() + } +} + +impl IrBlock { + /// Returns a value that can be formatted with [`Display`](std::fmt::Display) to show a detailed + /// listing of the instructions contained within this [`IrBlock`]. + pub fn display<'a>(&'a self, engine_state: &'a EngineState) -> FmtIrBlock<'a> { + FmtIrBlock { + engine_state, + ir_block: self, + } + } +} + +/// A slice into the `data` array of a block. This is a compact and cache-friendly way to store +/// string data that a block uses. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)] +pub struct DataSlice { + pub start: u32, + pub len: u32, +} + +impl DataSlice { + /// A data slice that contains no data. This slice is always valid. + pub const fn empty() -> DataSlice { + DataSlice { start: 0, len: 0 } + } +} + +impl std::ops::Index for [u8] { + type Output = [u8]; + + fn index(&self, index: DataSlice) -> &Self::Output { + &self[index.start as usize..(index.start as usize + index.len as usize)] + } +} + +/// A possible reference into the abstract syntax tree for an instruction. This is not present for +/// most instructions and is just added when needed. +#[derive(Debug, Clone)] +pub struct IrAstRef(pub Arc); + +impl Serialize for IrAstRef { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + self.0.as_ref().serialize(serializer) + } +} + +impl<'de> Deserialize<'de> for IrAstRef { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + Expression::deserialize(deserializer).map(|expr| IrAstRef(Arc::new(expr))) + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum Instruction { + /// Unreachable code path (error) + Unreachable, + /// Load a literal value into the `dst` register + LoadLiteral { dst: RegId, lit: Literal }, + /// Load a clone of a boxed value into the `dst` register (e.g. from const evaluation) + LoadValue { dst: RegId, val: Box }, + /// Move a register. Value is taken from `src` (used by this instruction). + Move { dst: RegId, src: RegId }, + /// Copy a register (must be a collected value). Value is still in `src` after this instruction. + Clone { dst: RegId, src: RegId }, + /// Collect a stream in a register to a value + Collect { src_dst: RegId }, + /// Change the span of the contents of a register to the span of this instruction. + Span { src_dst: RegId }, + /// Drop the value/stream in a register, without draining + Drop { src: RegId }, + /// Drain the value/stream in a register and discard (e.g. semicolon). + /// + /// If passed a stream from an external command, sets $env.LAST_EXIT_CODE to the resulting exit + /// code, and invokes any available error handler with Empty, or if not available, returns an + /// exit-code-only stream, leaving the block. + Drain { src: RegId }, + /// Load the value of a variable into the `dst` register + LoadVariable { dst: RegId, var_id: VarId }, + /// Store the value of a variable from the `src` register + StoreVariable { var_id: VarId, src: RegId }, + /// Load the value of an environment variable into the `dst` register + LoadEnv { dst: RegId, key: DataSlice }, + /// Load the value of an environment variable into the `dst` register, or `Nothing` if it + /// doesn't exist + LoadEnvOpt { dst: RegId, key: DataSlice }, + /// Store the value of an environment variable from the `src` register + StoreEnv { key: DataSlice, src: RegId }, + /// Add a positional arg to the next (internal) call. + PushPositional { src: RegId }, + /// Add a list of args to the next (internal) call (spread/rest). + AppendRest { src: RegId }, + /// Add a named arg with no value to the next (internal) call. + PushFlag { name: DataSlice }, + /// Add a short named arg with no value to the next (internal) call. + PushShortFlag { short: DataSlice }, + /// Add a named arg with a value to the next (internal) call. + PushNamed { name: DataSlice, src: RegId }, + /// Add a short named arg with a value to the next (internal) call. + PushShortNamed { short: DataSlice, src: RegId }, + /// Add parser info to the next (internal) call. + PushParserInfo { + name: DataSlice, + info: Box, + }, + /// Set the redirection for stdout for the next call (only). + /// + /// The register for a file redirection is not consumed. + RedirectOut { mode: RedirectMode }, + /// Set the redirection for stderr for the next call (only). + /// + /// The register for a file redirection is not consumed. + RedirectErr { mode: RedirectMode }, + /// Throw an error if stderr wasn't redirected in the given stream. `src` is preserved. + CheckErrRedirected { src: RegId }, + /// Open a file for redirection, pushing it onto the file stack. + OpenFile { + file_num: u32, + path: RegId, + append: bool, + }, + /// Write data from the register to a file. This is done to finish a file redirection, in case + /// an internal command or expression was evaluated rather than an external one. + WriteFile { file_num: u32, src: RegId }, + /// Pop a file used for redirection from the file stack. + CloseFile { file_num: u32 }, + /// Make a call. The input is taken from `src_dst`, and the output is placed in `src_dst`, + /// overwriting it. The argument stack is used implicitly and cleared when the call ends. + Call { decl_id: DeclId, src_dst: RegId }, + /// Append a value onto the end of a string. Uses `to_expanded_string(", ", ...)` on the value. + /// Used for string interpolation literals. Not the same thing as the `++` operator. + StringAppend { src_dst: RegId, val: RegId }, + /// Convert a string into a glob. Used for glob interpolation and setting glob variables. If the + /// value is already a glob, it won't be modified (`no_expand` will have no effect). + GlobFrom { src_dst: RegId, no_expand: bool }, + /// Push a value onto the end of a list. Used to construct list literals. + ListPush { src_dst: RegId, item: RegId }, + /// Spread a value onto the end of a list. Used to construct list literals. + ListSpread { src_dst: RegId, items: RegId }, + /// Insert a key-value pair into a record. Used to construct record literals. Raises an error if + /// the key already existed in the record. + RecordInsert { + src_dst: RegId, + key: RegId, + val: RegId, + }, + /// Spread a record onto a record. Used to construct record literals. Any existing value for the + /// key is overwritten. + RecordSpread { src_dst: RegId, items: RegId }, + /// Negate a boolean. + Not { src_dst: RegId }, + /// Do a binary operation on `lhs_dst` (left) and `rhs` (right) and write the result to + /// `lhs_dst`. + BinaryOp { + lhs_dst: RegId, + op: Operator, + rhs: RegId, + }, + /// Follow a cell path on the value in `src_dst`, storing the result back to `src_dst` + FollowCellPath { src_dst: RegId, path: RegId }, + /// Clone the value at a cell path in `src`, storing the result to `dst`. The original value + /// remains in `src`. Must be a collected value. + CloneCellPath { dst: RegId, src: RegId, path: RegId }, + /// Update/insert a cell path to `new_value` on the value in `src_dst`, storing the modified + /// value back to `src_dst` + UpsertCellPath { + src_dst: RegId, + path: RegId, + new_value: RegId, + }, + /// Jump to an offset in this block + Jump { index: usize }, + /// Branch to an offset in this block if the value of the `cond` register is a true boolean, + /// otherwise continue execution + BranchIf { cond: RegId, index: usize }, + /// Branch to an offset in this block if the value of the `src` register is Empty or Nothing, + /// otherwise continue execution. The original value in `src` is preserved. + BranchIfEmpty { src: RegId, index: usize }, + /// Match a pattern on `src`. If the pattern matches, branch to `index` after having set any + /// variables captured by the pattern. If the pattern doesn't match, continue execution. The + /// original value is preserved in `src` through this instruction. + Match { + pattern: Box, + src: RegId, + index: usize, + }, + /// Check that a match guard is a boolean, throwing + /// [`MatchGuardNotBool`](crate::ShellError::MatchGuardNotBool) if it isn't. Preserves `src`. + CheckMatchGuard { src: RegId }, + /// Iterate on register `stream`, putting the next value in `dst` if present, or jumping to + /// `end_index` if the iterator is finished + Iterate { + dst: RegId, + stream: RegId, + end_index: usize, + }, + /// Push an error handler, without capturing the error value + OnError { index: usize }, + /// Push an error handler, capturing the error value into `dst`. If the error handler is not + /// called, the register should be freed manually. + OnErrorInto { index: usize, dst: RegId }, + /// Pop an error handler. This is not necessary when control flow is directed to the error + /// handler due to an error. + PopErrorHandler, + /// Check if an external command failed. Boolean value into `dst`. `src` is preserved, but it + /// does require waiting for the command to exit. + CheckExternalFailed { dst: RegId, src: RegId }, + /// Return early from the block, raising a `ShellError::Return` instead. + /// + /// Collecting the value is unavoidable. + ReturnEarly { src: RegId }, + /// Return from the block with the value in the register + Return { src: RegId }, +} + +impl Instruction { + /// Returns a value that can be formatted with [`Display`](std::fmt::Display) to show a detailed + /// listing of the instruction. + pub fn display<'a>( + &'a self, + engine_state: &'a EngineState, + data: &'a [u8], + ) -> FmtInstruction<'a> { + FmtInstruction { + engine_state, + instruction: self, + data, + } + } + + /// Returns the branch target index of the instruction if this is a branching instruction. + pub fn branch_target(&self) -> Option { + match self { + Instruction::Jump { index } => Some(*index), + Instruction::BranchIf { cond: _, index } => Some(*index), + Instruction::BranchIfEmpty { src: _, index } => Some(*index), + Instruction::Match { + pattern: _, + src: _, + index, + } => Some(*index), + + Instruction::Iterate { + dst: _, + stream: _, + end_index, + } => Some(*end_index), + Instruction::OnError { index } => Some(*index), + Instruction::OnErrorInto { index, dst: _ } => Some(*index), + _ => None, + } + } + + /// Sets the branch target of the instruction if this is a branching instruction. + /// + /// Returns `Err(target_index)` if it isn't a branching instruction. + pub fn set_branch_target(&mut self, target_index: usize) -> Result<(), usize> { + match self { + Instruction::Jump { index } => *index = target_index, + Instruction::BranchIf { cond: _, index } => *index = target_index, + Instruction::BranchIfEmpty { src: _, index } => *index = target_index, + Instruction::Match { + pattern: _, + src: _, + index, + } => *index = target_index, + + Instruction::Iterate { + dst: _, + stream: _, + end_index, + } => *end_index = target_index, + Instruction::OnError { index } => *index = target_index, + Instruction::OnErrorInto { index, dst: _ } => *index = target_index, + _ => return Err(target_index), + } + Ok(()) + } +} + +// This is to document/enforce the size of `Instruction` in bytes. +// We should try to avoid increasing the size of `Instruction`, +// and PRs that do so will have to change the number below so that it's noted in review. +const _: () = assert!(std::mem::size_of::() <= 24); + +/// A literal value that can be embedded in an instruction. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum Literal { + Bool(bool), + Int(i64), + Float(f64), + Filesize(i64), + Duration(i64), + Binary(DataSlice), + Block(BlockId), + Closure(BlockId), + RowCondition(BlockId), + Range { + start: RegId, + step: RegId, + end: RegId, + inclusion: RangeInclusion, + }, + List { + capacity: usize, + }, + Record { + capacity: usize, + }, + Filepath { + val: DataSlice, + no_expand: bool, + }, + Directory { + val: DataSlice, + no_expand: bool, + }, + GlobPattern { + val: DataSlice, + no_expand: bool, + }, + String(DataSlice), + RawString(DataSlice), + CellPath(Box), + Date(Box>), + Nothing, +} + +/// A redirection mode for the next call. See [`OutDest`](crate::OutDest). +/// +/// This is generated by: +/// +/// 1. Explicit redirection in a [`PipelineElement`](crate::ast::PipelineElement), or +/// 2. The [`pipe_redirection()`](crate::engine::Command::pipe_redirection) of the command being +/// piped into. +/// +/// Not setting it uses the default, determined by [`Stack`](crate::engine::Stack). +#[derive(Debug, Clone, Copy, Serialize, Deserialize)] +pub enum RedirectMode { + Pipe, + Capture, + Null, + Inherit, + /// Use the given numbered file. + File { + file_num: u32, + }, + /// Use the redirection mode requested by the caller, for a pre-return call. + Caller, +} + +/// Just a hack to allow `Arc<[u8]>` to be serialized and deserialized +mod serde_arc_u8_array { + use serde::{Deserialize, Serialize}; + use std::sync::Arc; + + pub fn serialize(data: &Arc<[u8]>, ser: S) -> Result + where + S: serde::Serializer, + { + data.as_ref().serialize(ser) + } + + pub fn deserialize<'de, D>(de: D) -> Result, D::Error> + where + D: serde::Deserializer<'de>, + { + let data: Vec = Deserialize::deserialize(de)?; + Ok(data.into()) + } +} diff --git a/crates/nu-protocol/src/lib.rs b/crates/nu-protocol/src/lib.rs index 9c176953d5..cae5d3fd0a 100644 --- a/crates/nu-protocol/src/lib.rs +++ b/crates/nu-protocol/src/lib.rs @@ -9,6 +9,7 @@ pub mod eval_base; pub mod eval_const; mod example; mod id; +pub mod ir; mod lev_distance; mod module; pub mod parser_path; diff --git a/crates/nu-protocol/src/pipeline/byte_stream.rs b/crates/nu-protocol/src/pipeline/byte_stream.rs index 6226f1d8db..cd62b70801 100644 --- a/crates/nu-protocol/src/pipeline/byte_stream.rs +++ b/crates/nu-protocol/src/pipeline/byte_stream.rs @@ -352,6 +352,12 @@ impl ByteStream { self.span } + /// Changes the [`Span`] associated with the [`ByteStream`]. + pub fn with_span(mut self, span: Span) -> Self { + self.span = span; + self + } + /// Returns the [`ByteStreamType`] associated with the [`ByteStream`]. pub fn type_(&self) -> ByteStreamType { self.type_ diff --git a/crates/nu-protocol/src/pipeline/list_stream.rs b/crates/nu-protocol/src/pipeline/list_stream.rs index 997cc3f77b..104bab6bcc 100644 --- a/crates/nu-protocol/src/pipeline/list_stream.rs +++ b/crates/nu-protocol/src/pipeline/list_stream.rs @@ -31,11 +31,22 @@ impl ListStream { self.span } + /// Changes the [`Span`] associated with this [`ListStream`]. + pub fn with_span(mut self, span: Span) -> Self { + self.span = span; + self + } + /// Convert a [`ListStream`] into its inner [`Value`] `Iterator`. pub fn into_inner(self) -> ValueIterator { self.stream } + /// Take a single value from the inner `Iterator`, modifying the stream. + pub fn next_value(&mut self) -> Option { + self.stream.next() + } + /// Converts each value in a [`ListStream`] into a string and then joins the strings together /// using the given separator. pub fn into_string(self, separator: &str, config: &Config) -> String { diff --git a/crates/nu-protocol/src/pipeline/pipeline_data.rs b/crates/nu-protocol/src/pipeline/pipeline_data.rs index a546e90191..a89337a6c2 100644 --- a/crates/nu-protocol/src/pipeline/pipeline_data.rs +++ b/crates/nu-protocol/src/pipeline/pipeline_data.rs @@ -96,6 +96,24 @@ impl PipelineData { } } + /// Change the span of the [`PipelineData`]. + /// + /// Returns `Value(Nothing)` with the given span if it was [`PipelineData::Empty`]. + pub fn with_span(self, span: Span) -> Self { + match self { + PipelineData::Empty => PipelineData::Value(Value::nothing(span), None), + PipelineData::Value(value, metadata) => { + PipelineData::Value(value.with_span(span), metadata) + } + PipelineData::ListStream(stream, metadata) => { + PipelineData::ListStream(stream.with_span(span), metadata) + } + PipelineData::ByteStream(stream, metadata) => { + PipelineData::ByteStream(stream.with_span(span), metadata) + } + } + } + /// Get a type that is representative of the `PipelineData`. /// /// The type returned here makes no effort to collect a stream, so it may be a different type @@ -129,7 +147,8 @@ impl PipelineData { /// without consuming input and without writing anything. /// /// For the other [`OutDest`]s, the given `PipelineData` will be completely consumed - /// and `PipelineData::Empty` will be returned. + /// and `PipelineData::Empty` will be returned, unless the data is from an external stream, + /// in which case an external stream containing only that exit code will be returned. pub fn write_to_out_dests( self, engine_state: &EngineState, @@ -137,7 +156,11 @@ impl PipelineData { ) -> Result { match (self, stack.stdout()) { (PipelineData::ByteStream(stream, ..), stdout) => { - stream.write_to_out_dests(stdout, stack.stderr())?; + if let Some(exit_status) = stream.write_to_out_dests(stdout, stack.stderr())? { + return Ok(PipelineData::new_external_stream_with_only_exit_code( + exit_status.code(), + )); + } } (data, OutDest::Pipe | OutDest::Capture) => return Ok(data), (PipelineData::Empty, ..) => {} @@ -570,7 +593,7 @@ impl PipelineData { self.write_all_and_flush(engine_state, no_newline, to_stderr) } else { let call = Call::new(Span::new(0, 0)); - let table = command.run(engine_state, stack, &call, self)?; + let table = command.run(engine_state, stack, &(&call).into(), self)?; table.write_all_and_flush(engine_state, no_newline, to_stderr) } } else { diff --git a/crates/nu-protocol/src/signature.rs b/crates/nu-protocol/src/signature.rs index 70e94b35f1..5928ce0c0c 100644 --- a/crates/nu-protocol/src/signature.rs +++ b/crates/nu-protocol/src/signature.rs @@ -1,6 +1,5 @@ use crate::{ - ast::Call, - engine::{Command, CommandType, EngineState, Stack}, + engine::{Call, Command, CommandType, EngineState, Stack}, BlockId, PipelineData, ShellError, SyntaxShape, Type, Value, VarId, }; use serde::{Deserialize, Serialize}; diff --git a/crates/nu-protocol/src/span.rs b/crates/nu-protocol/src/span.rs index 0d280eaa9d..f5bcebc543 100644 --- a/crates/nu-protocol/src/span.rs +++ b/crates/nu-protocol/src/span.rs @@ -53,6 +53,22 @@ impl Spanned { } } +impl Spanned> { + /// Move the `Result` to the outside, resulting in a spanned `Ok` or unspanned `Err`. + pub fn transpose(self) -> Result, E> { + match self { + Spanned { + item: Ok(item), + span, + } => Ok(Spanned { item, span }), + Spanned { + item: Err(err), + span: _, + } => Err(err), + } + } +} + /// Helper trait to create [`Spanned`] more ergonomically. pub trait IntoSpanned: Sized { /// Wrap items together with a span into [`Spanned`]. diff --git a/crates/nu-test-support/src/macros.rs b/crates/nu-test-support/src/macros.rs index e83b4354da..958a2453f5 100644 --- a/crates/nu-test-support/src/macros.rs +++ b/crates/nu-test-support/src/macros.rs @@ -247,6 +247,7 @@ pub struct NuOpts { pub locale: Option, pub envs: Option>, pub collapse_output: Option, + pub use_ir: Option, } pub fn nu_run_test(opts: NuOpts, commands: impl AsRef, with_std: bool) -> Outcome { @@ -296,6 +297,15 @@ pub fn nu_run_test(opts: NuOpts, commands: impl AsRef, with_std: bool) -> O .stdout(Stdio::piped()) .stderr(Stdio::piped()); + // Explicitly set NU_USE_IR + if let Some(use_ir) = opts.use_ir { + if use_ir { + command.env("NU_USE_IR", "1"); + } else { + command.env_remove("NU_USE_IR"); + } + } + // Uncomment to debug the command being run: // println!("=== command\n{command:?}\n"); @@ -373,6 +383,7 @@ where if !executable_path.exists() { executable_path = crate::fs::installed_nu_path(); } + let process = match setup_command(&executable_path, &target_cwd) .envs(envs) .arg("--commands") diff --git a/src/run.rs b/src/run.rs index 6bb02451b9..10a5043b25 100644 --- a/src/run.rs +++ b/src/run.rs @@ -26,6 +26,10 @@ pub(crate) fn run_commands( let mut stack = Stack::new(); let start_time = std::time::Instant::now(); + if stack.has_env_var(engine_state, "NU_USE_IR") { + stack.use_ir = true; + } + // if the --no-config-file(-n) option is NOT passed, load the plugin file, // load the default env file or custom (depending on parsed_nu_cli_args.env_file), // and maybe a custom config file (depending on parsed_nu_cli_args.config_file) @@ -109,6 +113,10 @@ pub(crate) fn run_file( trace!("run_file"); let mut stack = Stack::new(); + if stack.has_env_var(engine_state, "NU_USE_IR") { + stack.use_ir = true; + } + // if the --no-config-file(-n) option is NOT passed, load the plugin file, // load the default env file or custom (depending on parsed_nu_cli_args.env_file), // and maybe a custom config file (depending on parsed_nu_cli_args.config_file) @@ -184,6 +192,10 @@ pub(crate) fn run_repl( let mut stack = Stack::new(); let start_time = std::time::Instant::now(); + if stack.has_env_var(engine_state, "NU_USE_IR") { + stack.use_ir = true; + } + if parsed_nu_cli_args.no_config_file.is_none() { setup_config( engine_state, diff --git a/tests/eval/mod.rs b/tests/eval/mod.rs index f3af92376a..e9afd401e9 100644 --- a/tests/eval/mod.rs +++ b/tests/eval/mod.rs @@ -1,7 +1,8 @@ -use nu_test_support::nu; +use nu_test_support::{nu, playground::Playground}; +use regex::Regex; #[test] -fn source_file_relative_to_file() { +fn record_with_redefined_key() { let actual = nu!("{x: 1, x: 2}"); assert!(actual.err.contains("redefined")); @@ -16,3 +17,455 @@ fn run_file_parse_error() { assert!(actual.err.contains("unknown type")); } + +enum ExpectedOut<'a> { + /// Equals a string exactly + Eq(&'a str), + /// Matches a regex + Matches(&'a str), + /// Produces an error (match regex) + Error(&'a str), + /// Drops a file that contains these contents + FileEq(&'a str, &'a str), +} +use self::ExpectedOut::*; + +fn test_eval(source: &str, expected_out: ExpectedOut) { + Playground::setup("test_eval_ast", |ast_dirs, _playground| { + Playground::setup("test_eval_ir", |ir_dirs, _playground| { + let actual_ast = nu!( + cwd: ast_dirs.test(), + use_ir: false, + source, + ); + let actual_ir = nu!( + cwd: ir_dirs.test(), + use_ir: true, + source, + ); + + match expected_out { + Eq(eq) => { + assert_eq!(actual_ast.out, eq); + assert_eq!(actual_ir.out, eq); + assert!(actual_ast.status.success()); + assert!(actual_ir.status.success()); + } + Matches(regex) => { + let compiled_regex = Regex::new(regex).expect("regex failed to compile"); + assert!( + compiled_regex.is_match(&actual_ast.out), + "AST eval out does not match: {}\n{}", + regex, + actual_ast.out + ); + assert!( + compiled_regex.is_match(&actual_ir.out), + "IR eval out does not match: {}\n{}", + regex, + actual_ir.out, + ); + assert!(actual_ast.status.success()); + assert!(actual_ir.status.success()); + } + Error(regex) => { + let compiled_regex = Regex::new(regex).expect("regex failed to compile"); + assert!( + compiled_regex.is_match(&actual_ast.err), + "AST eval err does not match: {}", + regex + ); + assert!( + compiled_regex.is_match(&actual_ir.err), + "IR eval err does not match: {}", + regex + ); + assert!(!actual_ast.status.success()); + assert!(!actual_ir.status.success()); + } + FileEq(path, contents) => { + let ast_contents = std::fs::read_to_string(ast_dirs.test().join(path)) + .expect("failed to read AST file"); + let ir_contents = std::fs::read_to_string(ir_dirs.test().join(path)) + .expect("failed to read IR file"); + assert_eq!(ast_contents.trim(), contents); + assert_eq!(ir_contents.trim(), contents); + assert!(actual_ast.status.success()); + assert!(actual_ir.status.success()); + } + } + assert_eq!(actual_ast.out, actual_ir.out); + }) + }); +} + +#[test] +fn literal_bool() { + test_eval("true", Eq("true")) +} + +#[test] +fn literal_int() { + test_eval("1", Eq("1")) +} + +#[test] +fn literal_float() { + test_eval("1.5", Eq("1.5")) +} + +#[test] +fn literal_filesize() { + test_eval("30MiB", Eq("30.0 MiB")) +} + +#[test] +fn literal_duration() { + test_eval("30ms", Eq("30ms")) +} + +#[test] +fn literal_binary() { + test_eval("0x[1f 2f f0]", Matches("Length.*1f.*2f.*f0")) +} + +#[test] +fn literal_closure() { + test_eval("{||}", Matches(" hello.txt", + FileEq("hello.txt", "hello"), + ) +} + +#[test] +fn let_variable() { + test_eval("let foo = 'test'; print $foo", Eq("test")) +} + +#[test] +fn let_variable_mutate_error() { + test_eval( + "let foo = 'test'; $foo = 'bar'; print $foo", + Error("immutable"), + ) +} + +#[test] +fn constant() { + test_eval("const foo = 1 + 2; print $foo", Eq("3")) +} + +#[test] +fn constant_assign_error() { + test_eval( + "const foo = 1 + 2; $foo = 4; print $foo", + Error("immutable"), + ) +} + +#[test] +fn mut_variable() { + test_eval("mut foo = 'test'; $foo = 'bar'; print $foo", Eq("bar")) +} + +#[test] +fn mut_variable_append_assign() { + test_eval( + "mut foo = 'test'; $foo ++= 'bar'; print $foo", + Eq("testbar"), + ) +} + +#[test] +fn bind_in_variable_to_input() { + test_eval("3 | (4 + $in)", Eq("7")) +} + +#[test] +fn if_true() { + test_eval("if true { 'foo' }", Eq("foo")) +} + +#[test] +fn if_false() { + test_eval("if false { 'foo' } | describe", Eq("nothing")) +} + +#[test] +fn if_else_true() { + test_eval("if 5 > 3 { 'foo' } else { 'bar' }", Eq("foo")) +} + +#[test] +fn if_else_false() { + test_eval("if 5 < 3 { 'foo' } else { 'bar' }", Eq("bar")) +} + +#[test] +fn match_empty_fallthrough() { + test_eval("match 42 { }; 'pass'", Eq("pass")) +} + +#[test] +fn match_value() { + test_eval("match 1 { 1 => 'pass', 2 => 'fail' }", Eq("pass")) +} + +#[test] +fn match_value_default() { + test_eval( + "match 3 { 1 => 'fail1', 2 => 'fail2', _ => 'pass' }", + Eq("pass"), + ) +} + +#[test] +fn match_value_fallthrough() { + test_eval("match 3 { 1 => 'fail1', 2 => 'fail2' }", Eq("")) +} + +#[test] +fn match_variable() { + test_eval( + "match 'pass' { $s => { print $s }, _ => { print 'fail' } }", + Eq("pass"), + ) +} + +#[test] +fn match_variable_in_list() { + test_eval("match [fail pass] { [$f, $p] => { print $p } }", Eq("pass")) +} + +#[test] +fn match_passthrough_input() { + test_eval( + "'yes' | match [pass fail] { [$p, ..] => (collect { |y| $y ++ $p }) }", + Eq("yespass"), + ) +} + +#[test] +fn while_mutate_var() { + test_eval("mut x = 2; while $x > 0 { print $x; $x -= 1 }", Eq("21")) +} + +#[test] +fn for_list() { + test_eval("for v in [1 2 3] { print ($v * 2) }", Eq(r"246")) +} + +#[test] +fn for_seq() { + test_eval("for v in (seq 1 4) { print ($v * 2) }", Eq("2468")) +} + +#[test] +fn early_return() { + test_eval("do { return 'foo'; 'bar' }", Eq("foo")) +} + +#[test] +fn early_return_from_if() { + test_eval("do { if true { return 'pass' }; 'fail' }", Eq("pass")) +} + +#[test] +fn early_return_from_loop() { + test_eval("do { loop { return 'pass' } }", Eq("pass")) +} + +#[test] +fn early_return_from_while() { + test_eval( + "do { let x = true; while $x { return 'pass' } }", + Eq("pass"), + ) +} + +#[test] +fn early_return_from_for() { + test_eval("do { for x in [pass fail] { return $x } }", Eq("pass")) +} + +#[test] +fn try_no_catch() { + test_eval("try { error make { msg: foo } }; 'pass'", Eq("pass")) +} + +#[test] +fn try_catch_no_var() { + test_eval( + "try { error make { msg: foo } } catch { 'pass' }", + Eq("pass"), + ) +} + +#[test] +fn try_catch_var() { + test_eval( + "try { error make { msg: foo } } catch { |err| $err.msg }", + Eq("foo"), + ) +} + +#[test] +fn try_catch_with_non_literal_closure_no_var() { + test_eval( + r#" + let error_handler = { || "pass" } + try { error make { msg: foobar } } catch $error_handler + "#, + Eq("pass"), + ) +} + +#[test] +fn try_catch_with_non_literal_closure() { + test_eval( + r#" + let error_handler = { |err| $err.msg } + try { error make { msg: foobar } } catch $error_handler + "#, + Eq("foobar"), + ) +} + +#[test] +fn row_condition() { + test_eval( + "[[a b]; [1 2] [3 4]] | where a < 3 | to nuon", + Eq("[[a, b]; [1, 2]]"), + ) +} + +#[test] +fn custom_command() { + test_eval( + r#" + def cmd [a: int, b: string = 'fail', ...c: string, --x: int] { $"($a)($b)($c)($x)" } + cmd 42 pass foo --x 30 + "#, + Eq("42pass[foo]30"), + ) +} From 1a5bf2447a9a46f83b8cc86c5cac0ea1bc6f1122 Mon Sep 17 00:00:00 2001 From: Devyn Cairns Date: Wed, 10 Jul 2024 17:34:50 -0700 Subject: [PATCH 5/8] Use Arc for environment variables on the stack (#13333) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Description This is another easy performance lift that just changes `env_vars` and `env_hidden` on `Stack` to use `Arc`. I noticed that these were being cloned on essentially every closure invocation during captures gathering, so we're paying the cost for all of that even when we don't change anything. On top of that, for `env_vars`, there's actually an entirely fresh `HashMap` created for each child scope, so it's highly unlikely that we'll modify the parent ones. Uses `Arc::make_mut` instead to take care of things when we need to mutate something, and most of the time nothing has to be cloned at all. # Benchmarks The benefits are greater the more calls there are to env-cloning functions like `captures_to_stack()`. Calling custom commands in a loop is basically best case for a performance improvement. Plain `each` with a literal block isn't so badly affected because the stack is set up once. ## random_bytes.nu ```nushell use std bench do { const SCRIPT = ../nu_scripts/benchmarks/random-bytes.nu let before_change = bench { nu $SCRIPT } let after_change = bench { target/release/nu $SCRIPT } { before: ($before_change | reject times), after: ($after_change | reject times) } } ``` ``` ╭────────┬──────────────────────────────╮ │ │ ╭──────┬───────────────────╮ │ │ before │ │ mean │ 603ms 759µs 727ns │ │ │ │ │ min │ 593ms 298µs 167ns │ │ │ │ │ max │ 648ms 612µs 291ns │ │ │ │ │ std │ 9ms 335µs 251ns │ │ │ │ ╰──────┴───────────────────╯ │ │ │ ╭──────┬───────────────────╮ │ │ after │ │ mean │ 518ms 400µs 557ns │ │ │ │ │ min │ 507ms 762µs 583ns │ │ │ │ │ max │ 566ms 695µs 166ns │ │ │ │ │ std │ 9ms 554µs 767ns │ │ │ │ ╰──────┴───────────────────╯ │ ╰────────┴──────────────────────────────╯ ``` ## gradient_benchmark_no_check.nu ```nushell use std bench do { const SCRIPT = ../nu_scripts/benchmarks/gradient_benchmark_no_check.nu let before_change = bench { nu $SCRIPT } let after_change = bench { target/release/nu $SCRIPT } { before: ($before_change | reject times), after: ($after_change | reject times) } } ``` ``` ╭────────┬──────────────────────────────╮ │ │ ╭──────┬───────────────────╮ │ │ before │ │ mean │ 146ms 543µs 380ns │ │ │ │ │ min │ 142ms 416µs 166ns │ │ │ │ │ max │ 189ms 595µs │ │ │ │ │ std │ 7ms 140µs 342ns │ │ │ │ ╰──────┴───────────────────╯ │ │ │ ╭──────┬───────────────────╮ │ │ after │ │ mean │ 134ms 211µs 678ns │ │ │ │ │ min │ 132ms 433µs 125ns │ │ │ │ │ max │ 135ms 722µs 583ns │ │ │ │ │ std │ 793µs 134ns │ │ │ │ ╰──────┴───────────────────╯ │ ╰────────┴──────────────────────────────╯ ``` # User-Facing Changes Better performance, particularly for custom commands, especially if there are a lot of environment variables. Nothing else. # Tests + Formatting All passing. --- crates/nu-engine/src/closure_eval.rs | 4 +-- crates/nu-protocol/src/engine/engine_state.rs | 2 +- crates/nu-protocol/src/engine/stack.rs | 30 ++++++++++--------- 3 files changed, 19 insertions(+), 17 deletions(-) diff --git a/crates/nu-engine/src/closure_eval.rs b/crates/nu-engine/src/closure_eval.rs index f4bc40658b..b271d90cbe 100644 --- a/crates/nu-engine/src/closure_eval.rs +++ b/crates/nu-engine/src/closure_eval.rs @@ -62,8 +62,8 @@ pub struct ClosureEval { stack: Stack, block: Arc, arg_index: usize, - env_vars: Vec, - env_hidden: HashMap>, + env_vars: Vec>, + env_hidden: Arc>>, eval: EvalBlockWithEarlyReturnFn, } diff --git a/crates/nu-protocol/src/engine/engine_state.rs b/crates/nu-protocol/src/engine/engine_state.rs index d45fbafb88..f48fde663b 100644 --- a/crates/nu-protocol/src/engine/engine_state.rs +++ b/crates/nu-protocol/src/engine/engine_state.rs @@ -304,7 +304,7 @@ impl EngineState { let mut config_updated = false; for mut scope in stack.env_vars.drain(..) { - for (overlay_name, mut env) in scope.drain() { + for (overlay_name, mut env) in Arc::make_mut(&mut scope).drain() { if let Some(env_vars) = Arc::make_mut(&mut self.env_vars).get_mut(&overlay_name) { // Updating existing overlay for (k, v) in env.drain() { diff --git a/crates/nu-protocol/src/engine/stack.rs b/crates/nu-protocol/src/engine/stack.rs index b289c1ae8b..9315744dfb 100644 --- a/crates/nu-protocol/src/engine/stack.rs +++ b/crates/nu-protocol/src/engine/stack.rs @@ -36,9 +36,9 @@ pub struct Stack { /// Variables pub vars: Vec<(VarId, Value)>, /// Environment variables arranged as a stack to be able to recover values from parent scopes - pub env_vars: Vec, + pub env_vars: Vec>, /// Tells which environment variables from engine state are hidden, per overlay. - pub env_hidden: HashMap>, + pub env_hidden: Arc>>, /// List of active overlays pub active_overlays: Vec, /// Argument stack for IR evaluation @@ -72,7 +72,7 @@ impl Stack { Self { vars: Vec::new(), env_vars: Vec::new(), - env_hidden: HashMap::new(), + env_hidden: Arc::new(HashMap::new()), active_overlays: vec![DEFAULT_OVERLAY_NAME.to_string()], arguments: ArgumentStack::new(), error_handlers: ErrorHandlerStack::new(), @@ -131,8 +131,8 @@ impl Stack { pub fn with_env( &mut self, - env_vars: &[EnvVars], - env_hidden: &HashMap>, + env_vars: &[Arc], + env_hidden: &Arc>>, ) { // Do not clone the environment if it hasn't changed if self.env_vars.iter().any(|scope| !scope.is_empty()) { @@ -219,23 +219,24 @@ impl Stack { pub fn add_env_var(&mut self, var: String, value: Value) { if let Some(last_overlay) = self.active_overlays.last() { - if let Some(env_hidden) = self.env_hidden.get_mut(last_overlay) { + if let Some(env_hidden) = Arc::make_mut(&mut self.env_hidden).get_mut(last_overlay) { // if the env var was hidden, let's activate it again env_hidden.remove(&var); } if let Some(scope) = self.env_vars.last_mut() { + let scope = Arc::make_mut(scope); if let Some(env_vars) = scope.get_mut(last_overlay) { env_vars.insert(var, value); } else { scope.insert(last_overlay.into(), [(var, value)].into_iter().collect()); } } else { - self.env_vars.push( + self.env_vars.push(Arc::new( [(last_overlay.into(), [(var, value)].into_iter().collect())] .into_iter() .collect(), - ); + )); } } else { // TODO: Remove panic @@ -257,9 +258,8 @@ impl Stack { } pub fn captures_to_stack_preserve_out_dest(&self, captures: Vec<(VarId, Value)>) -> Stack { - // FIXME: this is probably slow let mut env_vars = self.env_vars.clone(); - env_vars.push(HashMap::new()); + env_vars.push(Arc::new(HashMap::new())); Stack { vars: captures, @@ -292,7 +292,7 @@ impl Stack { } let mut env_vars = self.env_vars.clone(); - env_vars.push(HashMap::new()); + env_vars.push(Arc::new(HashMap::new())); Stack { vars, @@ -462,6 +462,7 @@ impl Stack { pub fn remove_env_var(&mut self, engine_state: &EngineState, name: &str) -> bool { for scope in self.env_vars.iter_mut().rev() { + let scope = Arc::make_mut(scope); for active_overlay in self.active_overlays.iter().rev() { if let Some(env_vars) = scope.get_mut(active_overlay) { if env_vars.remove(name).is_some() { @@ -474,10 +475,11 @@ impl Stack { for active_overlay in self.active_overlays.iter().rev() { if let Some(env_vars) = engine_state.env_vars.get(active_overlay) { if env_vars.get(name).is_some() { - if let Some(env_hidden) = self.env_hidden.get_mut(active_overlay) { - env_hidden.insert(name.into()); + let env_hidden = Arc::make_mut(&mut self.env_hidden); + if let Some(env_hidden_in_overlay) = env_hidden.get_mut(active_overlay) { + env_hidden_in_overlay.insert(name.into()); } else { - self.env_hidden + env_hidden .insert(active_overlay.into(), [name.into()].into_iter().collect()); } From ac561b1b0e65e9fd6ee6e682a08abb919983b0da Mon Sep 17 00:00:00 2001 From: Darren Schroeder <343840+fdncred@users.noreply.github.com> Date: Wed, 10 Jul 2024 20:19:06 -0500 Subject: [PATCH 6/8] quick fix up for ir pr as_refs (#13340) # Description Was having an issue compiling main after the IR pr. Talked to devyn and he led me to change a couple things real quick and we're compiling once again. --- crates/nu-engine/src/eval_ir.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/nu-engine/src/eval_ir.rs b/crates/nu-engine/src/eval_ir.rs index a505c9be34..e2ec7ccdac 100644 --- a/crates/nu-engine/src/eval_ir.rs +++ b/crates/nu-engine/src/eval_ir.rs @@ -1275,7 +1275,7 @@ fn get_env_var_case_insensitive<'a>(ctx: &'a mut EvalContext<'_>, key: &str) -> .env_vars .iter() .rev() - .chain(std::iter::once(ctx.engine_state.env_vars.as_ref())) + .chain(std::iter::once(&ctx.engine_state.env_vars)) .flat_map(|overlays| { // Read overlays in order ctx.stack @@ -1303,7 +1303,7 @@ fn get_env_var_name_case_insensitive<'a>(ctx: &mut EvalContext<'_>, key: &'a str .env_vars .iter() .rev() - .chain(std::iter::once(ctx.engine_state.env_vars.as_ref())) + .chain(std::iter::once(&ctx.engine_state.env_vars)) .flat_map(|overlays| { // Read overlays in order ctx.stack From f87cf895c2062cb5d71e97cd3221d101c825c8af Mon Sep 17 00:00:00 2001 From: Devyn Cairns Date: Wed, 10 Jul 2024 19:13:35 -0700 Subject: [PATCH 7/8] Set the capacity of the Vec used in `gather_captures()` to the number of captures expected (#13339) # Description Just more efficient allocation during `Stack::gather_captures()` so that we don't have to grow the `Vec` needlessly. # User-Facing Changes Slightly better performance. --- crates/nu-protocol/src/engine/stack.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/nu-protocol/src/engine/stack.rs b/crates/nu-protocol/src/engine/stack.rs index 9315744dfb..e963227ca8 100644 --- a/crates/nu-protocol/src/engine/stack.rs +++ b/crates/nu-protocol/src/engine/stack.rs @@ -277,7 +277,7 @@ impl Stack { } pub fn gather_captures(&self, engine_state: &EngineState, captures: &[VarId]) -> Stack { - let mut vars = vec![]; + let mut vars = Vec::with_capacity(captures.len()); let fake_span = Span::new(0, 0); From 801cfae279cb384ecef137caab0b77d899a26b44 Mon Sep 17 00:00:00 2001 From: Devyn Cairns Date: Wed, 10 Jul 2024 19:14:05 -0700 Subject: [PATCH 8/8] Avoid clone in `Signature::get_positional()` (#13338) # Description `Signature::get_positional()` was returning an owned `PositionalArg`, which contains a bunch of strings. `ClosureEval` uses this in `try_add_arg`, making all of that unnecessary cloning a little bit hot. # User-Facing Changes Slightly better performance --- crates/nu-protocol/src/signature.rs | 7 +++---- crates/nu-protocol/tests/test_signature.rs | 6 +++--- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/crates/nu-protocol/src/signature.rs b/crates/nu-protocol/src/signature.rs index 5928ce0c0c..3241b0df22 100644 --- a/crates/nu-protocol/src/signature.rs +++ b/crates/nu-protocol/src/signature.rs @@ -485,15 +485,14 @@ impl Signature { (name, s) } - pub fn get_positional(&self, position: usize) -> Option { + pub fn get_positional(&self, position: usize) -> Option<&PositionalArg> { if position < self.required_positional.len() { - self.required_positional.get(position).cloned() + self.required_positional.get(position) } else if position < (self.required_positional.len() + self.optional_positional.len()) { self.optional_positional .get(position - self.required_positional.len()) - .cloned() } else { - self.rest_positional.clone() + self.rest_positional.as_ref() } } diff --git a/crates/nu-protocol/tests/test_signature.rs b/crates/nu-protocol/tests/test_signature.rs index e22029bab9..8faf772c38 100644 --- a/crates/nu-protocol/tests/test_signature.rs +++ b/crates/nu-protocol/tests/test_signature.rs @@ -39,7 +39,7 @@ fn test_signature_chained() { assert_eq!( signature.get_positional(0), - Some(PositionalArg { + Some(&PositionalArg { name: "required".to_string(), desc: "required description".to_string(), shape: SyntaxShape::String, @@ -49,7 +49,7 @@ fn test_signature_chained() { ); assert_eq!( signature.get_positional(1), - Some(PositionalArg { + Some(&PositionalArg { name: "optional".to_string(), desc: "optional description".to_string(), shape: SyntaxShape::String, @@ -59,7 +59,7 @@ fn test_signature_chained() { ); assert_eq!( signature.get_positional(2), - Some(PositionalArg { + Some(&PositionalArg { name: "rest".to_string(), desc: "rest description".to_string(), shape: SyntaxShape::String,