From 5d083cdaee2715b8b502a75780dd77ddd4768f4a Mon Sep 17 00:00:00 2001 From: Devyn Cairns Date: Tue, 9 Jul 2024 17:08:03 -0700 Subject: [PATCH] make instruction builder use labels instead of adjusting placeholder instructions, add IR comments for debug aid --- crates/nu-engine/src/compile/builder.rs | 276 ++++++++++-------- crates/nu-engine/src/compile/keyword.rs | 158 +++++----- crates/nu-engine/src/compile/mod.rs | 2 +- crates/nu-engine/src/compile/operator.rs | 37 ++- .../nu-protocol/src/errors/compile_error.rs | 28 +- crates/nu-protocol/src/ir/display.rs | 11 +- crates/nu-protocol/src/ir/mod.rs | 52 ++++ 7 files changed, 351 insertions(+), 213 deletions(-) diff --git a/crates/nu-engine/src/compile/builder.rs b/crates/nu-engine/src/compile/builder.rs index c10df242b7..0d4d557858 100644 --- a/crates/nu-engine/src/compile/builder.rs +++ b/crates/nu-engine/src/compile/builder.rs @@ -3,16 +3,27 @@ use nu_protocol::{ CompileError, IntoSpanned, RegId, Span, Spanned, }; +/// A label identifier. Only exists while building code. Replaced with the actual target. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) struct LabelId(pub usize); + /// Builds [`IrBlock`]s progressively by consuming instructions and handles register allocation. #[derive(Debug)] pub(crate) struct BlockBuilder { pub(crate) instructions: Vec, pub(crate) spans: Vec, + /// The actual instruction index that a label refers to. While building IR, branch targets are + /// specified as indices into this array rather than the true instruction index. This makes it + /// easier to make modifications to code, as just this array needs to be changed, and it's also + /// less error prone as during `finish()` we check to make sure all of the used labels have had + /// an index actually set. + pub(crate) labels: Vec>, pub(crate) data: Vec, pub(crate) ast: Vec>, + pub(crate) comments: Vec, pub(crate) register_allocation_state: Vec, pub(crate) file_count: u32, - pub(crate) loop_stack: Vec, + pub(crate) loop_stack: Vec, } impl BlockBuilder { @@ -21,8 +32,10 @@ impl BlockBuilder { BlockBuilder { instructions: vec![], spans: vec![], + labels: vec![], data: vec![], ast: vec![], + comments: vec![], register_allocation_state: vec![true], file_count: 0, loop_stack: vec![], @@ -91,15 +104,34 @@ impl BlockBuilder { } } + /// Define a label, which can be used by branch instructions. The target can optionally be + /// specified now. + pub(crate) fn label(&mut self, target_index: Option) -> LabelId { + let label_id = self.labels.len(); + self.labels.push(target_index); + LabelId(label_id) + } + + /// Change the target of a label. + pub(crate) fn set_label( + &mut self, + label_id: LabelId, + target_index: usize, + ) -> Result<(), CompileError> { + *self + .labels + .get_mut(label_id.0) + .ok_or(CompileError::UndefinedLabel { + label_id: label_id.0, + span: None, + })? = Some(target_index); + Ok(()) + } + /// Insert an instruction into the block, automatically marking any registers populated by /// the instruction, and freeing any registers consumed by the instruction. - /// - /// Returns the offset of the inserted instruction. #[track_caller] - pub(crate) fn push( - &mut self, - instruction: Spanned, - ) -> Result { + pub(crate) fn push(&mut self, instruction: Spanned) -> Result<(), CompileError> { // Free read registers, and mark write registers. // // If a register is both read and written, it should be on both sides, so that we can verify @@ -258,11 +290,11 @@ impl BlockBuilder { Err(err) => return Err(err), } - let index = self.next_instruction_index(); self.instructions.push(instruction.item); self.spans.push(instruction.span); self.ast.push(None); - Ok(index) + self.comments.push(String::new()); + Ok(()) } /// Set the AST of the last instruction. Separate method because it's rarely used. @@ -270,6 +302,14 @@ impl BlockBuilder { *self.ast.last_mut().expect("no last instruction") = ast_ref; } + /// Add a comment to the last instruction. + pub(crate) fn add_comment(&mut self, comment: impl std::fmt::Display) { + add_comment( + self.comments.last_mut().expect("no last instruction"), + comment, + ) + } + /// Load a register with a literal. pub(crate) fn load_literal( &mut self, @@ -308,7 +348,7 @@ impl BlockBuilder { } /// Drain the stream in a register (fully consuming it) - pub(crate) fn drain(&mut self, src: RegId, span: Span) -> Result { + pub(crate) fn drain(&mut self, src: RegId, span: Span) -> Result<(), CompileError> { self.push(Instruction::Drain { src }.into_spanned(span)) } @@ -341,83 +381,41 @@ impl BlockBuilder { pub(crate) fn branch_if( &mut self, cond: RegId, - index: usize, + label_id: LabelId, span: Span, - ) -> Result { - self.push(Instruction::BranchIf { cond, index }.into_spanned(span)) - } - - /// Add a placeholder `branch-if` instruction, which must be updated with - /// [`.set_branch_target()`] - pub(crate) fn branch_if_placeholder( - &mut self, - cond: RegId, - span: Span, - ) -> Result { - self.branch_if(cond, usize::MAX, span) + ) -> Result<(), CompileError> { + self.push( + Instruction::BranchIf { + cond, + index: label_id.0, + } + .into_spanned(span), + ) } /// Add a `branch-if-empty` instruction pub(crate) fn branch_if_empty( &mut self, src: RegId, - index: usize, + label_id: LabelId, span: Span, - ) -> Result { - self.push(Instruction::BranchIfEmpty { src, index }.into_spanned(span)) + ) -> Result<(), CompileError> { + self.push( + Instruction::BranchIfEmpty { + src, + index: label_id.0, + } + .into_spanned(span), + ) } /// Add a `jump` instruction - pub(crate) fn jump(&mut self, index: usize, span: Span) -> Result { - self.push(Instruction::Jump { index }.into_spanned(span)) - } - - /// Add a placeholder `jump` instruction, which must be updated with [`.set_branch_target()`] - pub(crate) fn jump_placeholder(&mut self, span: Span) -> Result { - self.jump(usize::MAX, span) - } - - /// Modify a branching instruction's branch target `index` - #[track_caller] - pub(crate) fn set_branch_target( - &mut self, - instruction_index: usize, - target_index: usize, - ) -> Result<(), CompileError> { - match self.instructions.get_mut(instruction_index) { - Some( - Instruction::BranchIf { index, .. } - | Instruction::BranchIfEmpty { index, .. } - | Instruction::Jump { index } - | Instruction::Match { index, .. } - | Instruction::Iterate { - end_index: index, .. - } - | Instruction::OnError { index } - | Instruction::OnErrorInto { index, .. }, - ) => { - *index = target_index; - Ok(()) - } - Some(_) => { - let other = &self.instructions[instruction_index]; - - log::warn!("set branch target failed ({instruction_index} => {target_index}), target instruction = {other:?}, builder = {self:#?}"); - - Err(CompileError::SetBranchTargetOfNonBranchInstruction { - instruction: format!("{other:?}"), - span: self.spans[instruction_index], - caller: std::panic::Location::caller().to_string(), - }) - } - None => Err(CompileError::InstructionIndexOutOfRange { - index: instruction_index, - }), - } + pub(crate) fn jump(&mut self, label_id: LabelId, span: Span) -> Result<(), CompileError> { + self.push(Instruction::Jump { index: label_id.0 }.into_spanned(span)) } /// The index that the next instruction [`.push()`]ed will have. - pub(crate) fn next_instruction_index(&self) -> usize { + pub(crate) fn here(&self) -> usize { self.instructions.len() } @@ -431,9 +429,14 @@ impl BlockBuilder { Ok(next) } - /// Push a new loop state onto the builder. - pub(crate) fn begin_loop(&mut self) { - self.loop_stack.push(LoopState::new()); + /// Push a new loop state onto the builder. Creates new labels that must be set. + pub(crate) fn begin_loop(&mut self) -> Loop { + let loop_ = Loop { + break_label: self.label(None), + continue_label: self.label(None), + }; + self.loop_stack.push(loop_); + loop_ } /// True if we are currently in a loop. @@ -442,41 +445,32 @@ impl BlockBuilder { } /// Add a loop breaking jump instruction. - pub(crate) fn push_break(&mut self, span: Span) -> Result { - let index = self.jump_placeholder(span)?; - self.loop_stack - .last_mut() + pub(crate) fn push_break(&mut self, span: Span) -> Result<(), CompileError> { + let loop_ = self + .loop_stack + .last() .ok_or_else(|| CompileError::NotInALoop { msg: "`break` called from outside of a loop".into(), span: Some(span), - })? - .break_branches - .push(index); - Ok(index) + })?; + self.jump(loop_.break_label, span) } /// Add a loop continuing jump instruction. - pub(crate) fn push_continue(&mut self, span: Span) -> Result { - let index = self.jump_placeholder(span)?; - self.loop_stack - .last_mut() + pub(crate) fn push_continue(&mut self, span: Span) -> Result<(), CompileError> { + let loop_ = self + .loop_stack + .last() .ok_or_else(|| CompileError::NotInALoop { msg: "`continue` called from outside of a loop".into(), span: Some(span), - })? - .continue_branches - .push(index); - Ok(index) + })?; + self.jump(loop_.continue_label, span) } - /// Pop the loop state and set any `break` or `continue` instructions to their appropriate - /// target instruction indexes. - pub(crate) fn end_loop( - &mut self, - break_target_index: usize, - continue_target_index: usize, - ) -> Result<(), CompileError> { - let loop_state = self + /// Pop the loop state. Checks that the loop being ended is the same one that was expected. + pub(crate) fn end_loop(&mut self, loop_: Loop) -> Result<(), CompileError> { + let ended_loop = self .loop_stack .pop() .ok_or_else(|| CompileError::NotInALoop { @@ -484,50 +478,86 @@ impl BlockBuilder { span: None, })?; - for break_index in loop_state.break_branches { - self.set_branch_target(break_index, break_target_index)?; + if ended_loop == loop_ { + Ok(()) + } else { + Err(CompileError::IncoherentLoopState) } - for continue_index in loop_state.continue_branches { - self.set_branch_target(continue_index, continue_target_index)?; - } - - Ok(()) } /// Mark an unreachable code path. Produces an error at runtime if executed. - pub(crate) fn unreachable(&mut self, span: Span) -> Result { + pub(crate) fn unreachable(&mut self, span: Span) -> Result<(), CompileError> { self.push(Instruction::Unreachable.into_spanned(span)) } /// Consume the builder and produce the final [`IrBlock`]. - pub(crate) fn finish(self) -> IrBlock { - IrBlock { + pub(crate) fn finish(mut self) -> Result { + // Add comments to label targets + for (index, label_target) in self.labels.iter().enumerate() { + if let Some(label_target) = label_target { + add_comment( + &mut self.comments[*label_target], + format_args!("label({index})"), + ); + } + } + + // Populate the actual target indices of labels into the instructions + for ((index, instruction), span) in + self.instructions.iter_mut().enumerate().zip(&self.spans) + { + if let Some(label_id) = instruction.branch_target() { + let target_index = self.labels.get(label_id).cloned().flatten().ok_or( + CompileError::UndefinedLabel { + label_id, + span: Some(*span), + }, + )?; + // Add a comment to the target index that we come from here + add_comment( + &mut self.comments[target_index], + format_args!("from({index}:)"), + ); + instruction.set_branch_target(target_index).map_err(|_| { + CompileError::SetBranchTargetOfNonBranchInstruction { + instruction: format!("{:?}", instruction), + span: *span, + } + })?; + } + } + + Ok(IrBlock { instructions: self.instructions, spans: self.spans, data: self.data.into(), ast: self.ast, + comments: self.comments.into_iter().map(|s| s.into()).collect(), register_count: self .register_allocation_state .len() .try_into() .expect("register count overflowed in finish() despite previous checks"), file_count: self.file_count, - } + }) } } -/// Keeps track of `break` and `continue` branches that need to be set up after a loop is compiled. -#[derive(Debug)] -pub(crate) struct LoopState { - break_branches: Vec, - continue_branches: Vec, +/// Keeps track of the `break` and `continue` target labels for a loop. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) struct Loop { + pub(crate) break_label: LabelId, + pub(crate) continue_label: LabelId, } -impl LoopState { - pub(crate) const fn new() -> Self { - LoopState { - break_branches: vec![], - continue_branches: vec![], - } - } +/// Add a new comment to an existing one +fn add_comment(comment: &mut String, new_comment: impl std::fmt::Display) { + use std::fmt::Write; + write!( + comment, + "{}{}", + if comment.is_empty() { "" } else { ", " }, + new_comment + ) + .expect("formatting failed"); } diff --git a/crates/nu-engine/src/compile/keyword.rs b/crates/nu-engine/src/compile/keyword.rs index 6cd4d3e7f8..75b3ab5523 100644 --- a/crates/nu-engine/src/compile/keyword.rs +++ b/crates/nu-engine/src/compile/keyword.rs @@ -36,6 +36,10 @@ pub(crate) fn compile_if( let true_block_id = true_block_arg.as_block().ok_or_else(invalid)?; let true_block = working_set.get_block(true_block_id); + let true_label = builder.label(None); + let false_label = builder.label(None); + let end_label = builder.label(None); + let not_condition_reg = { // Compile the condition first let condition_reg = builder.next_register()?; @@ -59,10 +63,12 @@ pub(crate) fn compile_if( condition_reg }; - // Set up a branch if the condition is false. Will go back and fix this to the right offset - let index_of_branch_if = builder.branch_if_placeholder(not_condition_reg, call.head)?; + // Set up a branch if the condition is false. + builder.branch_if(not_condition_reg, false_label, call.head)?; + builder.add_comment("if false"); // Compile the true case + builder.set_label(true_label, builder.here())?; compile_block( working_set, builder, @@ -73,12 +79,11 @@ pub(crate) fn compile_if( )?; // Add a jump over the false case - let index_of_jump = builder.jump_placeholder(else_arg.map(|e| e.span).unwrap_or(call.head))?; - - // Change the branch-if target to after the jump - builder.set_branch_target(index_of_branch_if, index_of_jump + 1)?; + builder.jump(end_label, else_arg.map(|e| e.span).unwrap_or(call.head))?; + builder.add_comment("end if"); // On the else side now, assert that io_reg is still valid + builder.set_label(false_label, builder.here())?; builder.mark_register(io_reg)?; if let Some(else_arg) = else_arg { @@ -125,8 +130,8 @@ pub(crate) fn compile_if( builder.load_empty(io_reg)?; } - // Change the jump target to the next index (out of the if-else) - builder.set_branch_target(index_of_jump, builder.next_instruction_index())?; + // Set the end label + builder.set_label(end_label, builder.here())?; Ok(()) } @@ -185,34 +190,41 @@ pub(crate) fn compile_match( builder.push(Instruction::Collect { src_dst: match_reg }.into_spanned(match_expr.span))?; // Generate the `match` instructions. Guards are not used at this stage. - let match_offset = builder.next_instruction_index(); + let mut match_labels = Vec::with_capacity(match_block.len()); + let mut next_labels = Vec::with_capacity(match_block.len()); + let end_label = builder.label(None); for (pattern, _) in match_block { + let match_label = builder.label(None); + match_labels.push(match_label); builder.push( Instruction::Match { pattern: Box::new(pattern.pattern.clone()), src: match_reg, - index: usize::MAX, // placeholder + index: match_label.0, } .into_spanned(pattern.span), )?; + // Also add a label for the next match instruction or failure case + next_labels.push(builder.label(Some(builder.here()))); } - let mut end_jumps = Vec::with_capacity(match_block.len() + 1); - // Match fall-through to jump to the end, if no match builder.load_empty(io_reg)?; builder.drop_reg(match_reg)?; - end_jumps.push(builder.jump_placeholder(call.head)?); + builder.jump(end_label, call.head)?; // Generate each of the match expressions. Handle guards here, if present. for (index, (pattern, expr)) in match_block.iter().enumerate() { + let match_label = match_labels[index]; + let next_label = next_labels[index]; + // `io_reg` and `match_reg` are still valid at each of these branch targets builder.mark_register(io_reg)?; builder.mark_register(match_reg)?; // Set the original match instruction target here - builder.set_branch_target(match_offset + index, builder.next_instruction_index())?; + builder.set_label(match_label, builder.here())?; // Handle guard, if present if let Some(guard) = &pattern.guard { @@ -229,19 +241,16 @@ pub(crate) fn compile_match( .push(Instruction::CheckMatchGuard { src: guard_reg }.into_spanned(guard.span))?; builder.push(Instruction::Not { src_dst: guard_reg }.into_spanned(guard.span))?; // Branch to the next match instruction if the branch fails to match - builder.push( - Instruction::BranchIf { - cond: guard_reg, - index: match_offset + index + 1, - } - .into_spanned( - // Span the branch with the next pattern, or the head if this is the end - match_block - .get(index + 1) - .map(|b| b.0.span) - .unwrap_or(call.head), - ), + builder.branch_if( + guard_reg, + next_label, + // Span the branch with the next pattern, or the head if this is the end + match_block + .get(index + 1) + .map(|b| b.0.span) + .unwrap_or(call.head), )?; + builder.add_comment("if match guard false"); } // match_reg no longer needed, successful match @@ -269,14 +278,13 @@ pub(crate) fn compile_match( )?; } - // Rewrite this jump to the end afterward - end_jumps.push(builder.jump_placeholder(call.head)?); + // Jump to the end after the match logic is done + builder.jump(end_label, call.head)?; + builder.add_comment("end match"); } - // Rewrite the end jumps to the next instruction - for index in end_jumps { - builder.set_branch_target(index, builder.next_instruction_index())?; - } + // Set the end destination + builder.set_label(end_label, builder.here())?; Ok(()) } @@ -334,6 +342,7 @@ pub(crate) fn compile_let( } .into_spanned(call.head), )?; + builder.add_comment("let"); // Don't forget to set io_reg to Empty afterward, as that's the result of an assignment builder.load_empty(io_reg)?; @@ -395,6 +404,10 @@ pub(crate) fn compile_try( }; let catch_span = catch_expr.map(|e| e.span).unwrap_or(call.head); + let err_label = builder.label(None); + let failed_label = builder.label(None); + let end_label = builder.label(None); + // We have two ways of executing `catch`: if it was provided as a literal, we can inline it. // Otherwise, we have to evaluate the expression and keep it as a register, and then call `do`. enum CatchType<'a> { @@ -430,21 +443,23 @@ pub(crate) fn compile_try( }) .transpose()?; - // Put the error handler placeholder. If we have a catch expression then we should capture the + // Put the error handler instruction. If we have a catch expression then we should capture the // error. - let error_handler_index = if catch_type.is_some() { + if catch_type.is_some() { builder.push( Instruction::OnErrorInto { - index: usize::MAX, + index: err_label.0, dst: io_reg, } .into_spanned(call.head), )? } else { // Otherwise, we don't need the error value. - builder.push(Instruction::OnError { index: usize::MAX }.into_spanned(call.head))? + builder.push(Instruction::OnError { index: err_label.0 }.into_spanned(call.head))? }; + builder.add_comment("try"); + // Compile the block compile_block( working_set, @@ -464,23 +479,24 @@ pub(crate) fn compile_try( } .into_spanned(catch_span), )?; - let external_failed_branch = builder.branch_if_placeholder(failed_reg, catch_span)?; + builder.branch_if(failed_reg, failed_label, catch_span)?; // Successful case: pop the error handler builder.push(Instruction::PopErrorHandler.into_spanned(call.head))?; // Jump over the failure case - let jump_index = builder.jump_placeholder(catch_span)?; + builder.jump(end_label, catch_span)?; // Set up an error handler preamble for failed external. // Draining the %io_reg results in the error handler being called with Empty, and sets // $env.LAST_EXIT_CODE - builder.set_branch_target(external_failed_branch, builder.next_instruction_index())?; + builder.set_label(failed_label, builder.here())?; builder.drain(io_reg, catch_span)?; + builder.add_comment("branches to err"); builder.unreachable(catch_span)?; - // This is the real error handler - go back and set the right branch destination - builder.set_branch_target(error_handler_index, builder.next_instruction_index())?; + // This is the real error handler + builder.set_label(err_label, builder.here())?; // Mark out register as likely not clean - state in error handler is not well defined builder.mark_register(io_reg)?; @@ -559,7 +575,7 @@ pub(crate) fn compile_try( } // This is the end - if we succeeded, should jump here - builder.set_branch_target(jump_index, builder.next_instruction_index())?; + builder.set_label(end_label, builder.here())?; Ok(()) } @@ -588,10 +604,10 @@ pub(crate) fn compile_loop( let block_id = block_arg.as_block().ok_or_else(invalid)?; let block = working_set.get_block(block_id); - builder.begin_loop(); + let loop_ = builder.begin_loop(); builder.load_empty(io_reg)?; - let loop_index = builder.next_instruction_index(); + builder.set_label(loop_.continue_label, builder.here())?; compile_block( working_set, @@ -605,10 +621,11 @@ pub(crate) fn compile_loop( // Drain the output, just like for a semicolon builder.drain(io_reg, call.head)?; - builder.jump(loop_index, call.head)?; + builder.jump(loop_.continue_label, call.head)?; + builder.add_comment("loop"); - let end_index = builder.next_instruction_index(); - builder.end_loop(end_index, loop_index)?; + builder.set_label(loop_.break_label, builder.here())?; + builder.end_loop(loop_)?; // State of %io_reg is not necessarily well defined here due to control flow, so make sure it's // empty. @@ -645,9 +662,10 @@ pub(crate) fn compile_while( let block_id = block_arg.as_block().ok_or_else(invalid)?; let block = working_set.get_block(block_id); - builder.begin_loop(); + let loop_ = builder.begin_loop(); + builder.set_label(loop_.continue_label, builder.here())?; - let loop_index = builder.next_instruction_index(); + let true_label = builder.label(None); compile_expression( working_set, @@ -658,10 +676,12 @@ pub(crate) fn compile_while( io_reg, )?; - let branch_true_index = builder.branch_if_placeholder(io_reg, call.head)?; - let jump_false_index = builder.jump_placeholder(call.head)?; + builder.branch_if(io_reg, true_label, call.head)?; + builder.add_comment("while"); + builder.jump(loop_.break_label, call.head)?; + builder.add_comment("end while"); - builder.set_branch_target(branch_true_index, builder.next_instruction_index())?; + builder.set_label(true_label, builder.here())?; compile_block( working_set, @@ -675,12 +695,11 @@ pub(crate) fn compile_while( // Drain the result, just like for a semicolon builder.drain(io_reg, call.head)?; - builder.jump(loop_index, call.head)?; + builder.jump(loop_.continue_label, call.head)?; + builder.add_comment("while"); - let end_index = builder.next_instruction_index(); - builder.set_branch_target(jump_false_index, end_index)?; - - builder.end_loop(end_index, loop_index)?; + builder.set_label(loop_.break_label, builder.here())?; + builder.end_loop(loop_)?; // State of %io_reg is not necessarily well defined here due to control flow, so make sure it's // empty. @@ -730,9 +749,6 @@ pub(crate) fn compile_for( // Ensure io_reg is marked so we don't use it builder.mark_register(io_reg)?; - // Set up loop state - builder.begin_loop(); - let stream_reg = builder.next_register()?; compile_expression( @@ -744,16 +760,21 @@ pub(crate) fn compile_for( stream_reg, )?; + // Set up loop state + let loop_ = builder.begin_loop(); + builder.set_label(loop_.continue_label, builder.here())?; + // This gets a value from the stream each time it's executed // io_reg basically will act as our scratch register here - let iterate_index = builder.push( + builder.push( Instruction::Iterate { dst: io_reg, stream: stream_reg, - end_index: usize::MAX, // placeholder + end_index: loop_.break_label.0, } .into_spanned(call.head), )?; + builder.add_comment("for"); // Put the received value in the variable builder.push( @@ -778,14 +799,11 @@ pub(crate) fn compile_for( builder.drain(io_reg, call.head)?; // Loop back to iterate to get the next value - builder.jump(iterate_index, call.head)?; + builder.jump(loop_.continue_label, call.head)?; - // Update the iterate target to the end of the loop - let end_index = builder.next_instruction_index(); - builder.set_branch_target(iterate_index, end_index)?; - - // Handle break/continue - builder.end_loop(end_index, iterate_index)?; + // Set the end of the loop + builder.set_label(loop_.break_label, builder.here())?; + builder.end_loop(loop_)?; // We don't need stream_reg anymore, after the loop // io_reg may or may not be empty, so be sure it is @@ -807,6 +825,7 @@ pub(crate) fn compile_break( if builder.is_in_loop() { builder.load_empty(io_reg)?; builder.push_break(call.head)?; + builder.add_comment("break"); } else { // Fall back to calling the command if we can't find the loop target statically builder.push( @@ -831,6 +850,7 @@ pub(crate) fn compile_continue( if builder.is_in_loop() { builder.load_empty(io_reg)?; builder.push_continue(call.head)?; + builder.add_comment("continue"); } else { // Fall back to calling the command if we can't find the loop target statically builder.push( diff --git a/crates/nu-engine/src/compile/mod.rs b/crates/nu-engine/src/compile/mod.rs index 98cc205975..c608646868 100644 --- a/crates/nu-engine/src/compile/mod.rs +++ b/crates/nu-engine/src/compile/mod.rs @@ -39,7 +39,7 @@ pub fn compile(working_set: &StateWorkingSet, block: &Block) -> Result { - builder.branch_if(lhs_reg, builder.next_instruction_index() + 2, op.span)?; + let true_label = builder.label(None); + builder.branch_if(lhs_reg, true_label, op.span)?; // If the branch was not taken it's false, so short circuit to load false - let jump_false = builder.jump_placeholder(op.span)?; + let false_label = builder.label(None); + builder.jump(false_label, op.span)?; + builder.set_label(true_label, builder.here())?; compile_expression( working_set, builder, @@ -75,16 +78,18 @@ pub(crate) fn compile_binary_op( lhs_reg, )?; - let jump_end = builder.jump_placeholder(op.span)?; + let end_label = builder.label(None); + builder.jump(end_label, op.span)?; // Consumed by `branch-if`, so we have to set it false again - builder.set_branch_target(jump_false, builder.next_instruction_index())?; + builder.set_label(false_label, builder.here())?; builder.load_literal(lhs_reg, Literal::Bool(false).into_spanned(lhs.span))?; - builder.set_branch_target(jump_end, builder.next_instruction_index())?; + builder.set_label(end_label, builder.here())?; } Operator::Boolean(Boolean::Or) => { - let branch_true = builder.branch_if_placeholder(lhs_reg, op.span)?; + let true_label = builder.label(None); + builder.branch_if(lhs_reg, true_label, op.span)?; // If the branch was not taken it's false, so do the right-side expression compile_expression( @@ -96,13 +101,14 @@ pub(crate) fn compile_binary_op( lhs_reg, )?; - let jump_end = builder.jump_placeholder(op.span)?; + let end_label = builder.label(None); + builder.jump(end_label, op.span)?; // Consumed by `branch-if`, so we have to set it true again - builder.set_branch_target(branch_true, builder.next_instruction_index())?; + builder.set_label(true_label, builder.here())?; builder.load_literal(lhs_reg, Literal::Bool(true).into_spanned(lhs.span))?; - builder.set_branch_target(jump_end, builder.next_instruction_index())?; + builder.set_label(end_label, builder.here())?; } _ => { // Any other operator, via `binary-op` @@ -219,18 +225,19 @@ pub(crate) fn compile_assignment( )?; // Default to empty record so we can do further upserts - builder.branch_if_empty( - head_reg, - builder.next_instruction_index() + 2, - assignment_span, - )?; - builder.jump(builder.next_instruction_index() + 2, assignment_span)?; + let default_label = builder.label(None); + let upsert_label = builder.label(None); + builder.branch_if_empty(head_reg, default_label, assignment_span)?; + builder.jump(upsert_label, assignment_span)?; + + builder.set_label(default_label, builder.here())?; builder.load_literal( head_reg, Literal::Record { capacity: 0 }.into_spanned(lhs.span), )?; // Do the upsert on the current value to incorporate rhs + builder.set_label(upsert_label, builder.here())?; compile_upsert_cell_path( builder, (&path.tail[1..]).into_spanned(lhs.span), diff --git a/crates/nu-protocol/src/errors/compile_error.rs b/crates/nu-protocol/src/errors/compile_error.rs index 96093a25bc..9214289f42 100644 --- a/crates/nu-protocol/src/errors/compile_error.rs +++ b/crates/nu-protocol/src/errors/compile_error.rs @@ -84,13 +84,12 @@ pub enum CompileError { #[error("Attempted to set branch target of non-branch instruction.")] #[diagnostic( code(nu::compile::set_branch_target_of_non_branch_instruction), - help("this is a compiler bug. Please report it at https://github.com/nushell/nushell/issues/new\nfrom: {caller}"), + help("this is a compiler bug. Please report it at https://github.com/nushell/nushell/issues/new"), )] SetBranchTargetOfNonBranchInstruction { instruction: String, #[label("tried to modify: {instruction}")] span: Span, - caller: String, }, #[error("Instruction index out of range: {index}.")] @@ -207,4 +206,29 @@ pub enum CompileError { #[label("can't be used outside of a loop")] span: Option, }, + + #[error("Incoherent loop state: the loop that ended was not the one we were expecting.")] + #[diagnostic( + code(nu::compile::incoherent_loop_state), + help("this is a compiler bug. Please report it at https://github.com/nushell/nushell/issues/new"), + )] + IncoherentLoopState, + + #[error("Undefined label `{label_id}`.")] + #[diagnostic( + code(nu::compile::undefined_label), + help("this is a compiler bug. Please report it at https://github.com/nushell/nushell/issues/new"), + )] + UndefinedLabel { + label_id: usize, + #[label("label was used while compiling this code")] + span: Option, + }, + + #[error("Offset overflow: tried to add {offset} to {here}.")] + #[diagnostic( + code(nu::compile::offset_overflow), + help("this is a compiler bug. Please report it at https://github.com/nushell/nushell/issues/new"), + )] + OffsetOverflow { here: usize, offset: isize }, } diff --git a/crates/nu-protocol/src/ir/display.rs b/crates/nu-protocol/src/ir/display.rs index 7da914b4c1..c28323cca4 100644 --- a/crates/nu-protocol/src/ir/display.rs +++ b/crates/nu-protocol/src/ir/display.rs @@ -31,8 +31,7 @@ impl<'a> fmt::Display for FmtIrBlock<'a> { )?; } for (index, instruction) in self.ir_block.instructions.iter().enumerate() { - writeln!( - f, + let formatted = format!( "{:-4}: {}", index, FmtInstruction { @@ -40,7 +39,13 @@ impl<'a> fmt::Display for FmtIrBlock<'a> { instruction, data: &self.ir_block.data, } - )?; + ); + let comment = &self.ir_block.comments[index]; + if comment.is_empty() { + writeln!(f, "{formatted}")?; + } else { + writeln!(f, "{formatted:40} # {comment}")?; + } } Ok(()) } diff --git a/crates/nu-protocol/src/ir/mod.rs b/crates/nu-protocol/src/ir/mod.rs index 8cccb6c5cf..28677b743c 100644 --- a/crates/nu-protocol/src/ir/mod.rs +++ b/crates/nu-protocol/src/ir/mod.rs @@ -22,6 +22,8 @@ pub struct IrBlock { #[serde(with = "serde_arc_u8_array")] pub data: Arc<[u8]>, pub ast: Vec>, + /// Additional information that can be added to help with debugging + pub comments: Vec>, pub register_count: u32, pub file_count: u32, } @@ -33,6 +35,7 @@ impl fmt::Debug for IrBlock { .field("instructions", &self.instructions) .field("spans", &self.spans) .field("data", &self.data) + .field("comments", &self.comments) .field("register_count", &self.register_count) .field("file_count", &self.register_count) .finish_non_exhaustive() @@ -272,6 +275,55 @@ impl Instruction { data, } } + + /// Returns the branch target index of the instruction if this is a branching instruction. + pub fn branch_target(&self) -> Option { + match self { + Instruction::Jump { index } => Some(*index), + Instruction::BranchIf { cond: _, index } => Some(*index), + Instruction::BranchIfEmpty { src: _, index } => Some(*index), + Instruction::Match { + pattern: _, + src: _, + index, + } => Some(*index), + + Instruction::Iterate { + dst: _, + stream: _, + end_index, + } => Some(*end_index), + Instruction::OnError { index } => Some(*index), + Instruction::OnErrorInto { index, dst: _ } => Some(*index), + _ => None, + } + } + + /// Sets the branch target of the instruction if this is a branching instruction. + /// + /// Returns `Err(target_index)` if it isn't a branching instruction. + pub fn set_branch_target(&mut self, target_index: usize) -> Result<(), usize> { + match self { + Instruction::Jump { index } => *index = target_index, + Instruction::BranchIf { cond: _, index } => *index = target_index, + Instruction::BranchIfEmpty { src: _, index } => *index = target_index, + Instruction::Match { + pattern: _, + src: _, + index, + } => *index = target_index, + + Instruction::Iterate { + dst: _, + stream: _, + end_index, + } => *end_index = target_index, + Instruction::OnError { index } => *index = target_index, + Instruction::OnErrorInto { index, dst: _ } => *index = target_index, + _ => return Err(target_index), + } + Ok(()) + } } // This is to document/enforce the size of `Instruction` in bytes.