From fceaefcfaf5350f97248cb4ef77eb534d8569ce2 Mon Sep 17 00:00:00 2001 From: Devyn Cairns Date: Mon, 8 Jul 2024 18:50:56 -0700 Subject: [PATCH] fix behavior of loops with external stream results (should terminate) --- crates/nu-engine/src/compile/builder.rs | 81 ++++++++++ crates/nu-engine/src/compile/call.rs | 6 + crates/nu-engine/src/compile/keyword.rs | 146 ++++++++++++++++-- .../nu-protocol/src/errors/compile_error.rs | 10 ++ 4 files changed, 231 insertions(+), 12 deletions(-) diff --git a/crates/nu-engine/src/compile/builder.rs b/crates/nu-engine/src/compile/builder.rs index 94a8b79705..c930daa3b1 100644 --- a/crates/nu-engine/src/compile/builder.rs +++ b/crates/nu-engine/src/compile/builder.rs @@ -12,6 +12,7 @@ pub(crate) struct BlockBuilder { pub(crate) ast: Vec>, pub(crate) register_allocation_state: Vec, pub(crate) file_count: u32, + pub(crate) loop_stack: Vec, } impl BlockBuilder { @@ -24,6 +25,7 @@ impl BlockBuilder { ast: vec![], register_allocation_state: vec![true], file_count: 0, + loop_stack: vec![], } } @@ -420,6 +422,69 @@ impl BlockBuilder { Ok(next) } + /// Push a new loop state onto the builder. + pub(crate) fn begin_loop(&mut self) { + self.loop_stack.push(LoopState::new()); + } + + /// True if we are currently in a loop. + pub(crate) fn is_in_loop(&self) -> bool { + !self.loop_stack.is_empty() + } + + /// Add a loop breaking jump instruction. + pub(crate) fn push_break(&mut self, span: Span) -> Result { + let index = self.jump_placeholder(span)?; + self.loop_stack + .last_mut() + .ok_or_else(|| CompileError::NotInALoop { + msg: "`break` called from outside of a loop".into(), + span: Some(span), + })? + .break_branches + .push(index); + Ok(index) + } + + /// Add a loop continuing jump instruction. + pub(crate) fn push_continue(&mut self, span: Span) -> Result { + let index = self.jump_placeholder(span)?; + self.loop_stack + .last_mut() + .ok_or_else(|| CompileError::NotInALoop { + msg: "`continue` called from outside of a loop".into(), + span: Some(span), + })? + .continue_branches + .push(index); + Ok(index) + } + + /// Pop the loop state and set any `break` or `continue` instructions to their appropriate + /// target instruction indexes. + pub(crate) fn end_loop( + &mut self, + break_target_index: usize, + continue_target_index: usize, + ) -> Result<(), CompileError> { + let loop_state = self + .loop_stack + .pop() + .ok_or_else(|| CompileError::NotInALoop { + msg: "end_loop() called outside of a loop".into(), + span: None, + })?; + + for break_index in loop_state.break_branches { + self.set_branch_target(break_index, break_target_index)?; + } + for continue_index in loop_state.continue_branches { + self.set_branch_target(continue_index, continue_target_index)?; + } + + Ok(()) + } + /// Consume the builder and produce the final [`IrBlock`]. pub(crate) fn finish(self) -> IrBlock { IrBlock { @@ -436,3 +501,19 @@ impl BlockBuilder { } } } + +/// Keeps track of `break` and `continue` branches that need to be set up after a loop is compiled. +#[derive(Debug)] +pub(crate) struct LoopState { + break_branches: Vec, + continue_branches: Vec, +} + +impl LoopState { + pub(crate) const fn new() -> Self { + LoopState { + break_branches: vec![], + continue_branches: vec![], + } + } +} diff --git a/crates/nu-engine/src/compile/call.rs b/crates/nu-engine/src/compile/call.rs index acd9be7d87..2ce623b15f 100644 --- a/crates/nu-engine/src/compile/call.rs +++ b/crates/nu-engine/src/compile/call.rs @@ -62,6 +62,12 @@ pub(crate) fn compile_call( "for" => { return compile_for(working_set, builder, call, redirect_modes, io_reg); } + "break" => { + return compile_break(working_set, builder, call, redirect_modes, io_reg); + } + "continue" => { + return compile_continue(working_set, builder, call, redirect_modes, io_reg); + } "return" => { return compile_return(working_set, builder, call, redirect_modes, io_reg); } diff --git a/crates/nu-engine/src/compile/keyword.rs b/crates/nu-engine/src/compile/keyword.rs index afb2cfbd72..58ce1fcd04 100644 --- a/crates/nu-engine/src/compile/keyword.rs +++ b/crates/nu-engine/src/compile/keyword.rs @@ -539,9 +539,13 @@ pub(crate) fn compile_loop( ) -> Result<(), CompileError> { // Pseudocode: // - // LOOP: drain %io_reg - // ...... + // drop %io_reg + // LOOP: ...... + // check-external-failed %failed_reg, %io_reg + // drain %io_reg + // branch-if %failed_reg, END // jump %LOOP + // END: drop %io_reg let invalid = || CompileError::InvalidKeywordCall { keyword: "loop".into(), span: call.head, @@ -551,7 +555,10 @@ pub(crate) fn compile_loop( let block_id = block_arg.as_block().ok_or_else(invalid)?; let block = working_set.get_block(block_id); - let loop_index = builder.drain(io_reg, call.head)?; + builder.begin_loop(); + builder.load_empty(io_reg)?; + + let loop_index = builder.next_instruction_index(); compile_block( working_set, @@ -562,8 +569,27 @@ pub(crate) fn compile_loop( io_reg, )?; + // Check for failed externals, and drain the output + let failed_reg = builder.next_register()?; + builder.push( + Instruction::CheckExternalFailed { + dst: failed_reg, + src: io_reg, + } + .into_spanned(call.head), + )?; + builder.drain(io_reg, call.head)?; + let failed_index = builder.branch_if_placeholder(failed_reg, call.head)?; + builder.jump(loop_index, call.head)?; + let end_index = builder.next_instruction_index(); + builder.set_branch_target(failed_index, end_index)?; + builder.end_loop(end_index, loop_index)?; + + // State of %io_reg is not necessarily well defined here due to control flow, so make sure it's + // empty. + builder.mark_register(io_reg)?; builder.load_empty(io_reg)?; Ok(()) @@ -579,13 +605,16 @@ pub(crate) fn compile_while( ) -> Result<(), CompileError> { // Pseudocode: // - // LOOP: drain %io_reg - // %io_reg <- + // drop %io_reg + // LOOP: %io_reg <- // branch-if %io_reg, TRUE // jump FALSE // TRUE: ...... + // check-external-failed %failed_reg, %io_reg + // drain %io_reg + // branch-if %failed_reg, FALSE // jump LOOP - // FALSE: + // FALSE: drop %io_reg let invalid = || CompileError::InvalidKeywordCall { keyword: "while".into(), span: call.head, @@ -596,7 +625,10 @@ pub(crate) fn compile_while( let block_id = block_arg.as_block().ok_or_else(invalid)?; let block = working_set.get_block(block_id); - let loop_index = builder.drain(io_reg, call.head)?; + builder.begin_loop(); + builder.load_empty(io_reg)?; + + let loop_index = builder.next_instruction_index(); compile_expression( working_set, @@ -621,10 +653,29 @@ pub(crate) fn compile_while( io_reg, )?; + // We have to check the result for a failed external, and drain the output on each iteration + let failed_reg = builder.next_register()?; + builder.push( + Instruction::CheckExternalFailed { + dst: failed_reg, + src: io_reg, + } + .into_spanned(call.head), + )?; + builder.drain(io_reg, call.head)?; + let failed_index = builder.branch_if_placeholder(failed_reg, call.head)?; + builder.jump(loop_index, call.head)?; - builder.set_branch_target(jump_false_index, builder.next_instruction_index())?; + let end_index = builder.next_instruction_index(); + builder.set_branch_target(jump_false_index, end_index)?; + builder.set_branch_target(failed_index, end_index)?; + builder.end_loop(end_index, loop_index)?; + + // State of %io_reg is not necessarily well defined here due to control flow, so make sure it's + // empty. + builder.mark_register(io_reg)?; builder.load_empty(io_reg)?; Ok(()) @@ -644,9 +695,11 @@ pub(crate) fn compile_for( // LOOP: iterate %io_reg, %stream_reg, END // store-variable $var, %io_reg // %io_reg <- <...block...> + // check-external-failed %failed_reg, %io_reg // drain %io_reg + // branch-if %failed_reg, END // jump LOOP - // END: + // END: drop %io_reg let invalid = || CompileError::InvalidKeywordCall { keyword: "for".into(), span: call.head, @@ -670,6 +723,9 @@ pub(crate) fn compile_for( // Ensure io_reg is marked so we don't use it builder.mark_register(io_reg)?; + // Set up loop state + builder.begin_loop(); + let stream_reg = builder.next_register()?; compile_expression( @@ -711,24 +767,90 @@ pub(crate) fn compile_for( io_reg, )?; + // Check for failed external + let failed_reg = builder.next_register()?; + builder.push( + Instruction::CheckExternalFailed { + dst: failed_reg, + src: io_reg, + } + .into_spanned(call.head), + )?; + // Drain the output builder.drain(io_reg, call.head)?; + // End the loop if there was a failure + let failed_index = builder.branch_if_placeholder(failed_reg, call.head)?; + // Loop back to iterate to get the next value builder.jump(iterate_index, call.head)?; // Update the iterate target to the end of the loop - let target_index = builder.next_instruction_index(); - builder.set_branch_target(iterate_index, target_index)?; + let end_index = builder.next_instruction_index(); + builder.set_branch_target(failed_index, end_index)?; + builder.set_branch_target(iterate_index, end_index)?; + + // Handle break/continue + builder.end_loop(end_index, iterate_index)?; // We don't need stream_reg anymore, after the loop - // io_reg is guaranteed to be Empty due to the iterate instruction before + // io_reg may or may not be empty, so be sure it is builder.free_register(stream_reg)?; + builder.mark_register(io_reg)?; builder.load_empty(io_reg)?; Ok(()) } +/// Compile a call to `break`. +pub(crate) fn compile_break( + _working_set: &StateWorkingSet, + builder: &mut BlockBuilder, + call: &Call, + _redirect_modes: RedirectModes, + io_reg: RegId, +) -> Result<(), CompileError> { + if builder.is_in_loop() { + builder.load_empty(io_reg)?; + builder.push_break(call.head)?; + } else { + // Fall back to calling the command if we can't find the loop target statically + builder.push( + Instruction::Call { + decl_id: call.decl_id, + src_dst: io_reg, + } + .into_spanned(call.head), + )?; + } + Ok(()) +} + +/// Compile a call to `continue`. +pub(crate) fn compile_continue( + _working_set: &StateWorkingSet, + builder: &mut BlockBuilder, + call: &Call, + _redirect_modes: RedirectModes, + io_reg: RegId, +) -> Result<(), CompileError> { + if builder.is_in_loop() { + builder.load_empty(io_reg)?; + builder.push_continue(call.head)?; + } else { + // Fall back to calling the command if we can't find the loop target statically + builder.push( + Instruction::Call { + decl_id: call.decl_id, + src_dst: io_reg, + } + .into_spanned(call.head), + )?; + } + Ok(()) +} + /// Compile a call to `return` as a `return` instruction. /// /// This is not strictly necessary, but it is more efficient. diff --git a/crates/nu-protocol/src/errors/compile_error.rs b/crates/nu-protocol/src/errors/compile_error.rs index 134970ea9c..b138ddc60f 100644 --- a/crates/nu-protocol/src/errors/compile_error.rs +++ b/crates/nu-protocol/src/errors/compile_error.rs @@ -136,6 +136,7 @@ pub enum CompileError { }, #[error("Missing required declaration: `{decl_name}`")] + #[diagnostic(code(nu::compile::missing_required_declaration))] MissingRequiredDeclaration { decl_name: String, #[label("`{decl_name}` must be in scope to compile this expression")] @@ -143,9 +144,18 @@ pub enum CompileError { }, #[error("Invalid literal")] + #[diagnostic(code(nu::compile::invalid_literal))] InvalidLiteral { msg: String, #[label("{msg}")] span: Span, }, + + #[error("{msg}")] + #[diagnostic(code(nu::compile::not_in_a_loop))] + NotInALoop { + msg: String, + #[label("can't be used outside of a loop")] + span: Option, + }, }