make instruction builder use labels instead of adjusting placeholder instructions, add IR comments for debug aid

This commit is contained in:
Devyn Cairns 2024-07-09 17:08:03 -07:00
parent 23fc90a616
commit 5d083cdaee
7 changed files with 351 additions and 213 deletions

View File

@ -3,16 +3,27 @@ use nu_protocol::{
CompileError, IntoSpanned, RegId, Span, Spanned, CompileError, IntoSpanned, RegId, Span, Spanned,
}; };
/// A label identifier. Only exists while building code. Replaced with the actual target.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) struct LabelId(pub usize);
/// Builds [`IrBlock`]s progressively by consuming instructions and handles register allocation. /// Builds [`IrBlock`]s progressively by consuming instructions and handles register allocation.
#[derive(Debug)] #[derive(Debug)]
pub(crate) struct BlockBuilder { pub(crate) struct BlockBuilder {
pub(crate) instructions: Vec<Instruction>, pub(crate) instructions: Vec<Instruction>,
pub(crate) spans: Vec<Span>, pub(crate) spans: Vec<Span>,
/// The actual instruction index that a label refers to. While building IR, branch targets are
/// specified as indices into this array rather than the true instruction index. This makes it
/// easier to make modifications to code, as just this array needs to be changed, and it's also
/// less error prone as during `finish()` we check to make sure all of the used labels have had
/// an index actually set.
pub(crate) labels: Vec<Option<usize>>,
pub(crate) data: Vec<u8>, pub(crate) data: Vec<u8>,
pub(crate) ast: Vec<Option<IrAstRef>>, pub(crate) ast: Vec<Option<IrAstRef>>,
pub(crate) comments: Vec<String>,
pub(crate) register_allocation_state: Vec<bool>, pub(crate) register_allocation_state: Vec<bool>,
pub(crate) file_count: u32, pub(crate) file_count: u32,
pub(crate) loop_stack: Vec<LoopState>, pub(crate) loop_stack: Vec<Loop>,
} }
impl BlockBuilder { impl BlockBuilder {
@ -21,8 +32,10 @@ impl BlockBuilder {
BlockBuilder { BlockBuilder {
instructions: vec![], instructions: vec![],
spans: vec![], spans: vec![],
labels: vec![],
data: vec![], data: vec![],
ast: vec![], ast: vec![],
comments: vec![],
register_allocation_state: vec![true], register_allocation_state: vec![true],
file_count: 0, file_count: 0,
loop_stack: vec![], loop_stack: vec![],
@ -91,15 +104,34 @@ impl BlockBuilder {
} }
} }
/// Define a label, which can be used by branch instructions. The target can optionally be
/// specified now.
pub(crate) fn label(&mut self, target_index: Option<usize>) -> LabelId {
let label_id = self.labels.len();
self.labels.push(target_index);
LabelId(label_id)
}
/// Change the target of a label.
pub(crate) fn set_label(
&mut self,
label_id: LabelId,
target_index: usize,
) -> Result<(), CompileError> {
*self
.labels
.get_mut(label_id.0)
.ok_or(CompileError::UndefinedLabel {
label_id: label_id.0,
span: None,
})? = Some(target_index);
Ok(())
}
/// Insert an instruction into the block, automatically marking any registers populated by /// Insert an instruction into the block, automatically marking any registers populated by
/// the instruction, and freeing any registers consumed by the instruction. /// the instruction, and freeing any registers consumed by the instruction.
///
/// Returns the offset of the inserted instruction.
#[track_caller] #[track_caller]
pub(crate) fn push( pub(crate) fn push(&mut self, instruction: Spanned<Instruction>) -> Result<(), CompileError> {
&mut self,
instruction: Spanned<Instruction>,
) -> Result<usize, CompileError> {
// Free read registers, and mark write registers. // Free read registers, and mark write registers.
// //
// If a register is both read and written, it should be on both sides, so that we can verify // If a register is both read and written, it should be on both sides, so that we can verify
@ -258,11 +290,11 @@ impl BlockBuilder {
Err(err) => return Err(err), Err(err) => return Err(err),
} }
let index = self.next_instruction_index();
self.instructions.push(instruction.item); self.instructions.push(instruction.item);
self.spans.push(instruction.span); self.spans.push(instruction.span);
self.ast.push(None); self.ast.push(None);
Ok(index) self.comments.push(String::new());
Ok(())
} }
/// Set the AST of the last instruction. Separate method because it's rarely used. /// Set the AST of the last instruction. Separate method because it's rarely used.
@ -270,6 +302,14 @@ impl BlockBuilder {
*self.ast.last_mut().expect("no last instruction") = ast_ref; *self.ast.last_mut().expect("no last instruction") = ast_ref;
} }
/// Add a comment to the last instruction.
pub(crate) fn add_comment(&mut self, comment: impl std::fmt::Display) {
add_comment(
self.comments.last_mut().expect("no last instruction"),
comment,
)
}
/// Load a register with a literal. /// Load a register with a literal.
pub(crate) fn load_literal( pub(crate) fn load_literal(
&mut self, &mut self,
@ -308,7 +348,7 @@ impl BlockBuilder {
} }
/// Drain the stream in a register (fully consuming it) /// Drain the stream in a register (fully consuming it)
pub(crate) fn drain(&mut self, src: RegId, span: Span) -> Result<usize, CompileError> { pub(crate) fn drain(&mut self, src: RegId, span: Span) -> Result<(), CompileError> {
self.push(Instruction::Drain { src }.into_spanned(span)) self.push(Instruction::Drain { src }.into_spanned(span))
} }
@ -341,83 +381,41 @@ impl BlockBuilder {
pub(crate) fn branch_if( pub(crate) fn branch_if(
&mut self, &mut self,
cond: RegId, cond: RegId,
index: usize, label_id: LabelId,
span: Span, span: Span,
) -> Result<usize, CompileError> { ) -> Result<(), CompileError> {
self.push(Instruction::BranchIf { cond, index }.into_spanned(span)) self.push(
} Instruction::BranchIf {
cond,
/// Add a placeholder `branch-if` instruction, which must be updated with index: label_id.0,
/// [`.set_branch_target()`] }
pub(crate) fn branch_if_placeholder( .into_spanned(span),
&mut self, )
cond: RegId,
span: Span,
) -> Result<usize, CompileError> {
self.branch_if(cond, usize::MAX, span)
} }
/// Add a `branch-if-empty` instruction /// Add a `branch-if-empty` instruction
pub(crate) fn branch_if_empty( pub(crate) fn branch_if_empty(
&mut self, &mut self,
src: RegId, src: RegId,
index: usize, label_id: LabelId,
span: Span, span: Span,
) -> Result<usize, CompileError> { ) -> Result<(), CompileError> {
self.push(Instruction::BranchIfEmpty { src, index }.into_spanned(span)) self.push(
Instruction::BranchIfEmpty {
src,
index: label_id.0,
}
.into_spanned(span),
)
} }
/// Add a `jump` instruction /// Add a `jump` instruction
pub(crate) fn jump(&mut self, index: usize, span: Span) -> Result<usize, CompileError> { pub(crate) fn jump(&mut self, label_id: LabelId, span: Span) -> Result<(), CompileError> {
self.push(Instruction::Jump { index }.into_spanned(span)) self.push(Instruction::Jump { index: label_id.0 }.into_spanned(span))
}
/// Add a placeholder `jump` instruction, which must be updated with [`.set_branch_target()`]
pub(crate) fn jump_placeholder(&mut self, span: Span) -> Result<usize, CompileError> {
self.jump(usize::MAX, span)
}
/// Modify a branching instruction's branch target `index`
#[track_caller]
pub(crate) fn set_branch_target(
&mut self,
instruction_index: usize,
target_index: usize,
) -> Result<(), CompileError> {
match self.instructions.get_mut(instruction_index) {
Some(
Instruction::BranchIf { index, .. }
| Instruction::BranchIfEmpty { index, .. }
| Instruction::Jump { index }
| Instruction::Match { index, .. }
| Instruction::Iterate {
end_index: index, ..
}
| Instruction::OnError { index }
| Instruction::OnErrorInto { index, .. },
) => {
*index = target_index;
Ok(())
}
Some(_) => {
let other = &self.instructions[instruction_index];
log::warn!("set branch target failed ({instruction_index} => {target_index}), target instruction = {other:?}, builder = {self:#?}");
Err(CompileError::SetBranchTargetOfNonBranchInstruction {
instruction: format!("{other:?}"),
span: self.spans[instruction_index],
caller: std::panic::Location::caller().to_string(),
})
}
None => Err(CompileError::InstructionIndexOutOfRange {
index: instruction_index,
}),
}
} }
/// The index that the next instruction [`.push()`]ed will have. /// The index that the next instruction [`.push()`]ed will have.
pub(crate) fn next_instruction_index(&self) -> usize { pub(crate) fn here(&self) -> usize {
self.instructions.len() self.instructions.len()
} }
@ -431,9 +429,14 @@ impl BlockBuilder {
Ok(next) Ok(next)
} }
/// Push a new loop state onto the builder. /// Push a new loop state onto the builder. Creates new labels that must be set.
pub(crate) fn begin_loop(&mut self) { pub(crate) fn begin_loop(&mut self) -> Loop {
self.loop_stack.push(LoopState::new()); let loop_ = Loop {
break_label: self.label(None),
continue_label: self.label(None),
};
self.loop_stack.push(loop_);
loop_
} }
/// True if we are currently in a loop. /// True if we are currently in a loop.
@ -442,41 +445,32 @@ impl BlockBuilder {
} }
/// Add a loop breaking jump instruction. /// Add a loop breaking jump instruction.
pub(crate) fn push_break(&mut self, span: Span) -> Result<usize, CompileError> { pub(crate) fn push_break(&mut self, span: Span) -> Result<(), CompileError> {
let index = self.jump_placeholder(span)?; let loop_ = self
self.loop_stack .loop_stack
.last_mut() .last()
.ok_or_else(|| CompileError::NotInALoop { .ok_or_else(|| CompileError::NotInALoop {
msg: "`break` called from outside of a loop".into(), msg: "`break` called from outside of a loop".into(),
span: Some(span), span: Some(span),
})? })?;
.break_branches self.jump(loop_.break_label, span)
.push(index);
Ok(index)
} }
/// Add a loop continuing jump instruction. /// Add a loop continuing jump instruction.
pub(crate) fn push_continue(&mut self, span: Span) -> Result<usize, CompileError> { pub(crate) fn push_continue(&mut self, span: Span) -> Result<(), CompileError> {
let index = self.jump_placeholder(span)?; let loop_ = self
self.loop_stack .loop_stack
.last_mut() .last()
.ok_or_else(|| CompileError::NotInALoop { .ok_or_else(|| CompileError::NotInALoop {
msg: "`continue` called from outside of a loop".into(), msg: "`continue` called from outside of a loop".into(),
span: Some(span), span: Some(span),
})? })?;
.continue_branches self.jump(loop_.continue_label, span)
.push(index);
Ok(index)
} }
/// Pop the loop state and set any `break` or `continue` instructions to their appropriate /// Pop the loop state. Checks that the loop being ended is the same one that was expected.
/// target instruction indexes. pub(crate) fn end_loop(&mut self, loop_: Loop) -> Result<(), CompileError> {
pub(crate) fn end_loop( let ended_loop = self
&mut self,
break_target_index: usize,
continue_target_index: usize,
) -> Result<(), CompileError> {
let loop_state = self
.loop_stack .loop_stack
.pop() .pop()
.ok_or_else(|| CompileError::NotInALoop { .ok_or_else(|| CompileError::NotInALoop {
@ -484,50 +478,86 @@ impl BlockBuilder {
span: None, span: None,
})?; })?;
for break_index in loop_state.break_branches { if ended_loop == loop_ {
self.set_branch_target(break_index, break_target_index)?; Ok(())
} else {
Err(CompileError::IncoherentLoopState)
} }
for continue_index in loop_state.continue_branches {
self.set_branch_target(continue_index, continue_target_index)?;
}
Ok(())
} }
/// Mark an unreachable code path. Produces an error at runtime if executed. /// Mark an unreachable code path. Produces an error at runtime if executed.
pub(crate) fn unreachable(&mut self, span: Span) -> Result<usize, CompileError> { pub(crate) fn unreachable(&mut self, span: Span) -> Result<(), CompileError> {
self.push(Instruction::Unreachable.into_spanned(span)) self.push(Instruction::Unreachable.into_spanned(span))
} }
/// Consume the builder and produce the final [`IrBlock`]. /// Consume the builder and produce the final [`IrBlock`].
pub(crate) fn finish(self) -> IrBlock { pub(crate) fn finish(mut self) -> Result<IrBlock, CompileError> {
IrBlock { // Add comments to label targets
for (index, label_target) in self.labels.iter().enumerate() {
if let Some(label_target) = label_target {
add_comment(
&mut self.comments[*label_target],
format_args!("label({index})"),
);
}
}
// Populate the actual target indices of labels into the instructions
for ((index, instruction), span) in
self.instructions.iter_mut().enumerate().zip(&self.spans)
{
if let Some(label_id) = instruction.branch_target() {
let target_index = self.labels.get(label_id).cloned().flatten().ok_or(
CompileError::UndefinedLabel {
label_id,
span: Some(*span),
},
)?;
// Add a comment to the target index that we come from here
add_comment(
&mut self.comments[target_index],
format_args!("from({index}:)"),
);
instruction.set_branch_target(target_index).map_err(|_| {
CompileError::SetBranchTargetOfNonBranchInstruction {
instruction: format!("{:?}", instruction),
span: *span,
}
})?;
}
}
Ok(IrBlock {
instructions: self.instructions, instructions: self.instructions,
spans: self.spans, spans: self.spans,
data: self.data.into(), data: self.data.into(),
ast: self.ast, ast: self.ast,
comments: self.comments.into_iter().map(|s| s.into()).collect(),
register_count: self register_count: self
.register_allocation_state .register_allocation_state
.len() .len()
.try_into() .try_into()
.expect("register count overflowed in finish() despite previous checks"), .expect("register count overflowed in finish() despite previous checks"),
file_count: self.file_count, file_count: self.file_count,
} })
} }
} }
/// Keeps track of `break` and `continue` branches that need to be set up after a loop is compiled. /// Keeps track of the `break` and `continue` target labels for a loop.
#[derive(Debug)] #[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) struct LoopState { pub(crate) struct Loop {
break_branches: Vec<usize>, pub(crate) break_label: LabelId,
continue_branches: Vec<usize>, pub(crate) continue_label: LabelId,
} }
impl LoopState { /// Add a new comment to an existing one
pub(crate) const fn new() -> Self { fn add_comment(comment: &mut String, new_comment: impl std::fmt::Display) {
LoopState { use std::fmt::Write;
break_branches: vec![], write!(
continue_branches: vec![], comment,
} "{}{}",
} if comment.is_empty() { "" } else { ", " },
new_comment
)
.expect("formatting failed");
} }

View File

@ -36,6 +36,10 @@ pub(crate) fn compile_if(
let true_block_id = true_block_arg.as_block().ok_or_else(invalid)?; let true_block_id = true_block_arg.as_block().ok_or_else(invalid)?;
let true_block = working_set.get_block(true_block_id); let true_block = working_set.get_block(true_block_id);
let true_label = builder.label(None);
let false_label = builder.label(None);
let end_label = builder.label(None);
let not_condition_reg = { let not_condition_reg = {
// Compile the condition first // Compile the condition first
let condition_reg = builder.next_register()?; let condition_reg = builder.next_register()?;
@ -59,10 +63,12 @@ pub(crate) fn compile_if(
condition_reg condition_reg
}; };
// Set up a branch if the condition is false. Will go back and fix this to the right offset // Set up a branch if the condition is false.
let index_of_branch_if = builder.branch_if_placeholder(not_condition_reg, call.head)?; builder.branch_if(not_condition_reg, false_label, call.head)?;
builder.add_comment("if false");
// Compile the true case // Compile the true case
builder.set_label(true_label, builder.here())?;
compile_block( compile_block(
working_set, working_set,
builder, builder,
@ -73,12 +79,11 @@ pub(crate) fn compile_if(
)?; )?;
// Add a jump over the false case // Add a jump over the false case
let index_of_jump = builder.jump_placeholder(else_arg.map(|e| e.span).unwrap_or(call.head))?; builder.jump(end_label, else_arg.map(|e| e.span).unwrap_or(call.head))?;
builder.add_comment("end if");
// Change the branch-if target to after the jump
builder.set_branch_target(index_of_branch_if, index_of_jump + 1)?;
// On the else side now, assert that io_reg is still valid // On the else side now, assert that io_reg is still valid
builder.set_label(false_label, builder.here())?;
builder.mark_register(io_reg)?; builder.mark_register(io_reg)?;
if let Some(else_arg) = else_arg { if let Some(else_arg) = else_arg {
@ -125,8 +130,8 @@ pub(crate) fn compile_if(
builder.load_empty(io_reg)?; builder.load_empty(io_reg)?;
} }
// Change the jump target to the next index (out of the if-else) // Set the end label
builder.set_branch_target(index_of_jump, builder.next_instruction_index())?; builder.set_label(end_label, builder.here())?;
Ok(()) Ok(())
} }
@ -185,34 +190,41 @@ pub(crate) fn compile_match(
builder.push(Instruction::Collect { src_dst: match_reg }.into_spanned(match_expr.span))?; builder.push(Instruction::Collect { src_dst: match_reg }.into_spanned(match_expr.span))?;
// Generate the `match` instructions. Guards are not used at this stage. // Generate the `match` instructions. Guards are not used at this stage.
let match_offset = builder.next_instruction_index(); let mut match_labels = Vec::with_capacity(match_block.len());
let mut next_labels = Vec::with_capacity(match_block.len());
let end_label = builder.label(None);
for (pattern, _) in match_block { for (pattern, _) in match_block {
let match_label = builder.label(None);
match_labels.push(match_label);
builder.push( builder.push(
Instruction::Match { Instruction::Match {
pattern: Box::new(pattern.pattern.clone()), pattern: Box::new(pattern.pattern.clone()),
src: match_reg, src: match_reg,
index: usize::MAX, // placeholder index: match_label.0,
} }
.into_spanned(pattern.span), .into_spanned(pattern.span),
)?; )?;
// Also add a label for the next match instruction or failure case
next_labels.push(builder.label(Some(builder.here())));
} }
let mut end_jumps = Vec::with_capacity(match_block.len() + 1);
// Match fall-through to jump to the end, if no match // Match fall-through to jump to the end, if no match
builder.load_empty(io_reg)?; builder.load_empty(io_reg)?;
builder.drop_reg(match_reg)?; builder.drop_reg(match_reg)?;
end_jumps.push(builder.jump_placeholder(call.head)?); builder.jump(end_label, call.head)?;
// Generate each of the match expressions. Handle guards here, if present. // Generate each of the match expressions. Handle guards here, if present.
for (index, (pattern, expr)) in match_block.iter().enumerate() { for (index, (pattern, expr)) in match_block.iter().enumerate() {
let match_label = match_labels[index];
let next_label = next_labels[index];
// `io_reg` and `match_reg` are still valid at each of these branch targets // `io_reg` and `match_reg` are still valid at each of these branch targets
builder.mark_register(io_reg)?; builder.mark_register(io_reg)?;
builder.mark_register(match_reg)?; builder.mark_register(match_reg)?;
// Set the original match instruction target here // Set the original match instruction target here
builder.set_branch_target(match_offset + index, builder.next_instruction_index())?; builder.set_label(match_label, builder.here())?;
// Handle guard, if present // Handle guard, if present
if let Some(guard) = &pattern.guard { if let Some(guard) = &pattern.guard {
@ -229,19 +241,16 @@ pub(crate) fn compile_match(
.push(Instruction::CheckMatchGuard { src: guard_reg }.into_spanned(guard.span))?; .push(Instruction::CheckMatchGuard { src: guard_reg }.into_spanned(guard.span))?;
builder.push(Instruction::Not { src_dst: guard_reg }.into_spanned(guard.span))?; builder.push(Instruction::Not { src_dst: guard_reg }.into_spanned(guard.span))?;
// Branch to the next match instruction if the branch fails to match // Branch to the next match instruction if the branch fails to match
builder.push( builder.branch_if(
Instruction::BranchIf { guard_reg,
cond: guard_reg, next_label,
index: match_offset + index + 1, // Span the branch with the next pattern, or the head if this is the end
} match_block
.into_spanned( .get(index + 1)
// Span the branch with the next pattern, or the head if this is the end .map(|b| b.0.span)
match_block .unwrap_or(call.head),
.get(index + 1)
.map(|b| b.0.span)
.unwrap_or(call.head),
),
)?; )?;
builder.add_comment("if match guard false");
} }
// match_reg no longer needed, successful match // match_reg no longer needed, successful match
@ -269,14 +278,13 @@ pub(crate) fn compile_match(
)?; )?;
} }
// Rewrite this jump to the end afterward // Jump to the end after the match logic is done
end_jumps.push(builder.jump_placeholder(call.head)?); builder.jump(end_label, call.head)?;
builder.add_comment("end match");
} }
// Rewrite the end jumps to the next instruction // Set the end destination
for index in end_jumps { builder.set_label(end_label, builder.here())?;
builder.set_branch_target(index, builder.next_instruction_index())?;
}
Ok(()) Ok(())
} }
@ -334,6 +342,7 @@ pub(crate) fn compile_let(
} }
.into_spanned(call.head), .into_spanned(call.head),
)?; )?;
builder.add_comment("let");
// Don't forget to set io_reg to Empty afterward, as that's the result of an assignment // Don't forget to set io_reg to Empty afterward, as that's the result of an assignment
builder.load_empty(io_reg)?; builder.load_empty(io_reg)?;
@ -395,6 +404,10 @@ pub(crate) fn compile_try(
}; };
let catch_span = catch_expr.map(|e| e.span).unwrap_or(call.head); let catch_span = catch_expr.map(|e| e.span).unwrap_or(call.head);
let err_label = builder.label(None);
let failed_label = builder.label(None);
let end_label = builder.label(None);
// We have two ways of executing `catch`: if it was provided as a literal, we can inline it. // We have two ways of executing `catch`: if it was provided as a literal, we can inline it.
// Otherwise, we have to evaluate the expression and keep it as a register, and then call `do`. // Otherwise, we have to evaluate the expression and keep it as a register, and then call `do`.
enum CatchType<'a> { enum CatchType<'a> {
@ -430,21 +443,23 @@ pub(crate) fn compile_try(
}) })
.transpose()?; .transpose()?;
// Put the error handler placeholder. If we have a catch expression then we should capture the // Put the error handler instruction. If we have a catch expression then we should capture the
// error. // error.
let error_handler_index = if catch_type.is_some() { if catch_type.is_some() {
builder.push( builder.push(
Instruction::OnErrorInto { Instruction::OnErrorInto {
index: usize::MAX, index: err_label.0,
dst: io_reg, dst: io_reg,
} }
.into_spanned(call.head), .into_spanned(call.head),
)? )?
} else { } else {
// Otherwise, we don't need the error value. // Otherwise, we don't need the error value.
builder.push(Instruction::OnError { index: usize::MAX }.into_spanned(call.head))? builder.push(Instruction::OnError { index: err_label.0 }.into_spanned(call.head))?
}; };
builder.add_comment("try");
// Compile the block // Compile the block
compile_block( compile_block(
working_set, working_set,
@ -464,23 +479,24 @@ pub(crate) fn compile_try(
} }
.into_spanned(catch_span), .into_spanned(catch_span),
)?; )?;
let external_failed_branch = builder.branch_if_placeholder(failed_reg, catch_span)?; builder.branch_if(failed_reg, failed_label, catch_span)?;
// Successful case: pop the error handler // Successful case: pop the error handler
builder.push(Instruction::PopErrorHandler.into_spanned(call.head))?; builder.push(Instruction::PopErrorHandler.into_spanned(call.head))?;
// Jump over the failure case // Jump over the failure case
let jump_index = builder.jump_placeholder(catch_span)?; builder.jump(end_label, catch_span)?;
// Set up an error handler preamble for failed external. // Set up an error handler preamble for failed external.
// Draining the %io_reg results in the error handler being called with Empty, and sets // Draining the %io_reg results in the error handler being called with Empty, and sets
// $env.LAST_EXIT_CODE // $env.LAST_EXIT_CODE
builder.set_branch_target(external_failed_branch, builder.next_instruction_index())?; builder.set_label(failed_label, builder.here())?;
builder.drain(io_reg, catch_span)?; builder.drain(io_reg, catch_span)?;
builder.add_comment("branches to err");
builder.unreachable(catch_span)?; builder.unreachable(catch_span)?;
// This is the real error handler - go back and set the right branch destination // This is the real error handler
builder.set_branch_target(error_handler_index, builder.next_instruction_index())?; builder.set_label(err_label, builder.here())?;
// Mark out register as likely not clean - state in error handler is not well defined // Mark out register as likely not clean - state in error handler is not well defined
builder.mark_register(io_reg)?; builder.mark_register(io_reg)?;
@ -559,7 +575,7 @@ pub(crate) fn compile_try(
} }
// This is the end - if we succeeded, should jump here // This is the end - if we succeeded, should jump here
builder.set_branch_target(jump_index, builder.next_instruction_index())?; builder.set_label(end_label, builder.here())?;
Ok(()) Ok(())
} }
@ -588,10 +604,10 @@ pub(crate) fn compile_loop(
let block_id = block_arg.as_block().ok_or_else(invalid)?; let block_id = block_arg.as_block().ok_or_else(invalid)?;
let block = working_set.get_block(block_id); let block = working_set.get_block(block_id);
builder.begin_loop(); let loop_ = builder.begin_loop();
builder.load_empty(io_reg)?; builder.load_empty(io_reg)?;
let loop_index = builder.next_instruction_index(); builder.set_label(loop_.continue_label, builder.here())?;
compile_block( compile_block(
working_set, working_set,
@ -605,10 +621,11 @@ pub(crate) fn compile_loop(
// Drain the output, just like for a semicolon // Drain the output, just like for a semicolon
builder.drain(io_reg, call.head)?; builder.drain(io_reg, call.head)?;
builder.jump(loop_index, call.head)?; builder.jump(loop_.continue_label, call.head)?;
builder.add_comment("loop");
let end_index = builder.next_instruction_index(); builder.set_label(loop_.break_label, builder.here())?;
builder.end_loop(end_index, loop_index)?; builder.end_loop(loop_)?;
// State of %io_reg is not necessarily well defined here due to control flow, so make sure it's // State of %io_reg is not necessarily well defined here due to control flow, so make sure it's
// empty. // empty.
@ -645,9 +662,10 @@ pub(crate) fn compile_while(
let block_id = block_arg.as_block().ok_or_else(invalid)?; let block_id = block_arg.as_block().ok_or_else(invalid)?;
let block = working_set.get_block(block_id); let block = working_set.get_block(block_id);
builder.begin_loop(); let loop_ = builder.begin_loop();
builder.set_label(loop_.continue_label, builder.here())?;
let loop_index = builder.next_instruction_index(); let true_label = builder.label(None);
compile_expression( compile_expression(
working_set, working_set,
@ -658,10 +676,12 @@ pub(crate) fn compile_while(
io_reg, io_reg,
)?; )?;
let branch_true_index = builder.branch_if_placeholder(io_reg, call.head)?; builder.branch_if(io_reg, true_label, call.head)?;
let jump_false_index = builder.jump_placeholder(call.head)?; builder.add_comment("while");
builder.jump(loop_.break_label, call.head)?;
builder.add_comment("end while");
builder.set_branch_target(branch_true_index, builder.next_instruction_index())?; builder.set_label(true_label, builder.here())?;
compile_block( compile_block(
working_set, working_set,
@ -675,12 +695,11 @@ pub(crate) fn compile_while(
// Drain the result, just like for a semicolon // Drain the result, just like for a semicolon
builder.drain(io_reg, call.head)?; builder.drain(io_reg, call.head)?;
builder.jump(loop_index, call.head)?; builder.jump(loop_.continue_label, call.head)?;
builder.add_comment("while");
let end_index = builder.next_instruction_index(); builder.set_label(loop_.break_label, builder.here())?;
builder.set_branch_target(jump_false_index, end_index)?; builder.end_loop(loop_)?;
builder.end_loop(end_index, loop_index)?;
// State of %io_reg is not necessarily well defined here due to control flow, so make sure it's // State of %io_reg is not necessarily well defined here due to control flow, so make sure it's
// empty. // empty.
@ -730,9 +749,6 @@ pub(crate) fn compile_for(
// Ensure io_reg is marked so we don't use it // Ensure io_reg is marked so we don't use it
builder.mark_register(io_reg)?; builder.mark_register(io_reg)?;
// Set up loop state
builder.begin_loop();
let stream_reg = builder.next_register()?; let stream_reg = builder.next_register()?;
compile_expression( compile_expression(
@ -744,16 +760,21 @@ pub(crate) fn compile_for(
stream_reg, stream_reg,
)?; )?;
// Set up loop state
let loop_ = builder.begin_loop();
builder.set_label(loop_.continue_label, builder.here())?;
// This gets a value from the stream each time it's executed // This gets a value from the stream each time it's executed
// io_reg basically will act as our scratch register here // io_reg basically will act as our scratch register here
let iterate_index = builder.push( builder.push(
Instruction::Iterate { Instruction::Iterate {
dst: io_reg, dst: io_reg,
stream: stream_reg, stream: stream_reg,
end_index: usize::MAX, // placeholder end_index: loop_.break_label.0,
} }
.into_spanned(call.head), .into_spanned(call.head),
)?; )?;
builder.add_comment("for");
// Put the received value in the variable // Put the received value in the variable
builder.push( builder.push(
@ -778,14 +799,11 @@ pub(crate) fn compile_for(
builder.drain(io_reg, call.head)?; builder.drain(io_reg, call.head)?;
// Loop back to iterate to get the next value // Loop back to iterate to get the next value
builder.jump(iterate_index, call.head)?; builder.jump(loop_.continue_label, call.head)?;
// Update the iterate target to the end of the loop // Set the end of the loop
let end_index = builder.next_instruction_index(); builder.set_label(loop_.break_label, builder.here())?;
builder.set_branch_target(iterate_index, end_index)?; builder.end_loop(loop_)?;
// Handle break/continue
builder.end_loop(end_index, iterate_index)?;
// We don't need stream_reg anymore, after the loop // We don't need stream_reg anymore, after the loop
// io_reg may or may not be empty, so be sure it is // io_reg may or may not be empty, so be sure it is
@ -807,6 +825,7 @@ pub(crate) fn compile_break(
if builder.is_in_loop() { if builder.is_in_loop() {
builder.load_empty(io_reg)?; builder.load_empty(io_reg)?;
builder.push_break(call.head)?; builder.push_break(call.head)?;
builder.add_comment("break");
} else { } else {
// Fall back to calling the command if we can't find the loop target statically // Fall back to calling the command if we can't find the loop target statically
builder.push( builder.push(
@ -831,6 +850,7 @@ pub(crate) fn compile_continue(
if builder.is_in_loop() { if builder.is_in_loop() {
builder.load_empty(io_reg)?; builder.load_empty(io_reg)?;
builder.push_continue(call.head)?; builder.push_continue(call.head)?;
builder.add_comment("continue");
} else { } else {
// Fall back to calling the command if we can't find the loop target statically // Fall back to calling the command if we can't find the loop target statically
builder.push( builder.push(

View File

@ -39,7 +39,7 @@ pub fn compile(working_set: &StateWorkingSet, block: &Block) -> Result<IrBlock,
// A complete block has to end with a `return` // A complete block has to end with a `return`
builder.push(Instruction::Return { src: BLOCK_INPUT }.into_spanned(span))?; builder.push(Instruction::Return { src: BLOCK_INPUT }.into_spanned(span))?;
Ok(builder.finish()) builder.finish()
} }
/// Compiles a [`Block`] in-place into an IR block. This can be used in a nested manner, for example /// Compiles a [`Block`] in-place into an IR block. This can be used in a nested manner, for example

View File

@ -61,11 +61,14 @@ pub(crate) fn compile_binary_op(
match op.item { match op.item {
// `and` / `or` are short-circuiting, and we can get by with one register and a branch // `and` / `or` are short-circuiting, and we can get by with one register and a branch
Operator::Boolean(Boolean::And) => { Operator::Boolean(Boolean::And) => {
builder.branch_if(lhs_reg, builder.next_instruction_index() + 2, op.span)?; let true_label = builder.label(None);
builder.branch_if(lhs_reg, true_label, op.span)?;
// If the branch was not taken it's false, so short circuit to load false // If the branch was not taken it's false, so short circuit to load false
let jump_false = builder.jump_placeholder(op.span)?; let false_label = builder.label(None);
builder.jump(false_label, op.span)?;
builder.set_label(true_label, builder.here())?;
compile_expression( compile_expression(
working_set, working_set,
builder, builder,
@ -75,16 +78,18 @@ pub(crate) fn compile_binary_op(
lhs_reg, lhs_reg,
)?; )?;
let jump_end = builder.jump_placeholder(op.span)?; let end_label = builder.label(None);
builder.jump(end_label, op.span)?;
// Consumed by `branch-if`, so we have to set it false again // Consumed by `branch-if`, so we have to set it false again
builder.set_branch_target(jump_false, builder.next_instruction_index())?; builder.set_label(false_label, builder.here())?;
builder.load_literal(lhs_reg, Literal::Bool(false).into_spanned(lhs.span))?; builder.load_literal(lhs_reg, Literal::Bool(false).into_spanned(lhs.span))?;
builder.set_branch_target(jump_end, builder.next_instruction_index())?; builder.set_label(end_label, builder.here())?;
} }
Operator::Boolean(Boolean::Or) => { Operator::Boolean(Boolean::Or) => {
let branch_true = builder.branch_if_placeholder(lhs_reg, op.span)?; let true_label = builder.label(None);
builder.branch_if(lhs_reg, true_label, op.span)?;
// If the branch was not taken it's false, so do the right-side expression // If the branch was not taken it's false, so do the right-side expression
compile_expression( compile_expression(
@ -96,13 +101,14 @@ pub(crate) fn compile_binary_op(
lhs_reg, lhs_reg,
)?; )?;
let jump_end = builder.jump_placeholder(op.span)?; let end_label = builder.label(None);
builder.jump(end_label, op.span)?;
// Consumed by `branch-if`, so we have to set it true again // Consumed by `branch-if`, so we have to set it true again
builder.set_branch_target(branch_true, builder.next_instruction_index())?; builder.set_label(true_label, builder.here())?;
builder.load_literal(lhs_reg, Literal::Bool(true).into_spanned(lhs.span))?; builder.load_literal(lhs_reg, Literal::Bool(true).into_spanned(lhs.span))?;
builder.set_branch_target(jump_end, builder.next_instruction_index())?; builder.set_label(end_label, builder.here())?;
} }
_ => { _ => {
// Any other operator, via `binary-op` // Any other operator, via `binary-op`
@ -219,18 +225,19 @@ pub(crate) fn compile_assignment(
)?; )?;
// Default to empty record so we can do further upserts // Default to empty record so we can do further upserts
builder.branch_if_empty( let default_label = builder.label(None);
head_reg, let upsert_label = builder.label(None);
builder.next_instruction_index() + 2, builder.branch_if_empty(head_reg, default_label, assignment_span)?;
assignment_span, builder.jump(upsert_label, assignment_span)?;
)?;
builder.jump(builder.next_instruction_index() + 2, assignment_span)?; builder.set_label(default_label, builder.here())?;
builder.load_literal( builder.load_literal(
head_reg, head_reg,
Literal::Record { capacity: 0 }.into_spanned(lhs.span), Literal::Record { capacity: 0 }.into_spanned(lhs.span),
)?; )?;
// Do the upsert on the current value to incorporate rhs // Do the upsert on the current value to incorporate rhs
builder.set_label(upsert_label, builder.here())?;
compile_upsert_cell_path( compile_upsert_cell_path(
builder, builder,
(&path.tail[1..]).into_spanned(lhs.span), (&path.tail[1..]).into_spanned(lhs.span),

View File

@ -84,13 +84,12 @@ pub enum CompileError {
#[error("Attempted to set branch target of non-branch instruction.")] #[error("Attempted to set branch target of non-branch instruction.")]
#[diagnostic( #[diagnostic(
code(nu::compile::set_branch_target_of_non_branch_instruction), code(nu::compile::set_branch_target_of_non_branch_instruction),
help("this is a compiler bug. Please report it at https://github.com/nushell/nushell/issues/new\nfrom: {caller}"), help("this is a compiler bug. Please report it at https://github.com/nushell/nushell/issues/new"),
)] )]
SetBranchTargetOfNonBranchInstruction { SetBranchTargetOfNonBranchInstruction {
instruction: String, instruction: String,
#[label("tried to modify: {instruction}")] #[label("tried to modify: {instruction}")]
span: Span, span: Span,
caller: String,
}, },
#[error("Instruction index out of range: {index}.")] #[error("Instruction index out of range: {index}.")]
@ -207,4 +206,29 @@ pub enum CompileError {
#[label("can't be used outside of a loop")] #[label("can't be used outside of a loop")]
span: Option<Span>, span: Option<Span>,
}, },
#[error("Incoherent loop state: the loop that ended was not the one we were expecting.")]
#[diagnostic(
code(nu::compile::incoherent_loop_state),
help("this is a compiler bug. Please report it at https://github.com/nushell/nushell/issues/new"),
)]
IncoherentLoopState,
#[error("Undefined label `{label_id}`.")]
#[diagnostic(
code(nu::compile::undefined_label),
help("this is a compiler bug. Please report it at https://github.com/nushell/nushell/issues/new"),
)]
UndefinedLabel {
label_id: usize,
#[label("label was used while compiling this code")]
span: Option<Span>,
},
#[error("Offset overflow: tried to add {offset} to {here}.")]
#[diagnostic(
code(nu::compile::offset_overflow),
help("this is a compiler bug. Please report it at https://github.com/nushell/nushell/issues/new"),
)]
OffsetOverflow { here: usize, offset: isize },
} }

View File

@ -31,8 +31,7 @@ impl<'a> fmt::Display for FmtIrBlock<'a> {
)?; )?;
} }
for (index, instruction) in self.ir_block.instructions.iter().enumerate() { for (index, instruction) in self.ir_block.instructions.iter().enumerate() {
writeln!( let formatted = format!(
f,
"{:-4}: {}", "{:-4}: {}",
index, index,
FmtInstruction { FmtInstruction {
@ -40,7 +39,13 @@ impl<'a> fmt::Display for FmtIrBlock<'a> {
instruction, instruction,
data: &self.ir_block.data, data: &self.ir_block.data,
} }
)?; );
let comment = &self.ir_block.comments[index];
if comment.is_empty() {
writeln!(f, "{formatted}")?;
} else {
writeln!(f, "{formatted:40} # {comment}")?;
}
} }
Ok(()) Ok(())
} }

View File

@ -22,6 +22,8 @@ pub struct IrBlock {
#[serde(with = "serde_arc_u8_array")] #[serde(with = "serde_arc_u8_array")]
pub data: Arc<[u8]>, pub data: Arc<[u8]>,
pub ast: Vec<Option<IrAstRef>>, pub ast: Vec<Option<IrAstRef>>,
/// Additional information that can be added to help with debugging
pub comments: Vec<Box<str>>,
pub register_count: u32, pub register_count: u32,
pub file_count: u32, pub file_count: u32,
} }
@ -33,6 +35,7 @@ impl fmt::Debug for IrBlock {
.field("instructions", &self.instructions) .field("instructions", &self.instructions)
.field("spans", &self.spans) .field("spans", &self.spans)
.field("data", &self.data) .field("data", &self.data)
.field("comments", &self.comments)
.field("register_count", &self.register_count) .field("register_count", &self.register_count)
.field("file_count", &self.register_count) .field("file_count", &self.register_count)
.finish_non_exhaustive() .finish_non_exhaustive()
@ -272,6 +275,55 @@ impl Instruction {
data, data,
} }
} }
/// Returns the branch target index of the instruction if this is a branching instruction.
pub fn branch_target(&self) -> Option<usize> {
match self {
Instruction::Jump { index } => Some(*index),
Instruction::BranchIf { cond: _, index } => Some(*index),
Instruction::BranchIfEmpty { src: _, index } => Some(*index),
Instruction::Match {
pattern: _,
src: _,
index,
} => Some(*index),
Instruction::Iterate {
dst: _,
stream: _,
end_index,
} => Some(*end_index),
Instruction::OnError { index } => Some(*index),
Instruction::OnErrorInto { index, dst: _ } => Some(*index),
_ => None,
}
}
/// Sets the branch target of the instruction if this is a branching instruction.
///
/// Returns `Err(target_index)` if it isn't a branching instruction.
pub fn set_branch_target(&mut self, target_index: usize) -> Result<(), usize> {
match self {
Instruction::Jump { index } => *index = target_index,
Instruction::BranchIf { cond: _, index } => *index = target_index,
Instruction::BranchIfEmpty { src: _, index } => *index = target_index,
Instruction::Match {
pattern: _,
src: _,
index,
} => *index = target_index,
Instruction::Iterate {
dst: _,
stream: _,
end_index,
} => *end_index = target_index,
Instruction::OnError { index } => *index = target_index,
Instruction::OnErrorInto { index, dst: _ } => *index = target_index,
_ => return Err(target_index),
}
Ok(())
}
} }
// This is to document/enforce the size of `Instruction` in bytes. // This is to document/enforce the size of `Instruction` in bytes.