make instruction builder use labels instead of adjusting placeholder instructions, add IR comments for debug aid

This commit is contained in:
Devyn Cairns 2024-07-09 17:08:03 -07:00
parent 23fc90a616
commit 5d083cdaee
7 changed files with 351 additions and 213 deletions

View File

@ -3,16 +3,27 @@ use nu_protocol::{
CompileError, IntoSpanned, RegId, Span, Spanned,
};
/// A label identifier. Only exists while building code. Replaced with the actual target.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) struct LabelId(pub usize);
/// Builds [`IrBlock`]s progressively by consuming instructions and handles register allocation.
#[derive(Debug)]
pub(crate) struct BlockBuilder {
pub(crate) instructions: Vec<Instruction>,
pub(crate) spans: Vec<Span>,
/// The actual instruction index that a label refers to. While building IR, branch targets are
/// specified as indices into this array rather than the true instruction index. This makes it
/// easier to make modifications to code, as just this array needs to be changed, and it's also
/// less error prone as during `finish()` we check to make sure all of the used labels have had
/// an index actually set.
pub(crate) labels: Vec<Option<usize>>,
pub(crate) data: Vec<u8>,
pub(crate) ast: Vec<Option<IrAstRef>>,
pub(crate) comments: Vec<String>,
pub(crate) register_allocation_state: Vec<bool>,
pub(crate) file_count: u32,
pub(crate) loop_stack: Vec<LoopState>,
pub(crate) loop_stack: Vec<Loop>,
}
impl BlockBuilder {
@ -21,8 +32,10 @@ impl BlockBuilder {
BlockBuilder {
instructions: vec![],
spans: vec![],
labels: vec![],
data: vec![],
ast: vec![],
comments: vec![],
register_allocation_state: vec![true],
file_count: 0,
loop_stack: vec![],
@ -91,15 +104,34 @@ impl BlockBuilder {
}
}
/// Define a label, which can be used by branch instructions. The target can optionally be
/// specified now.
pub(crate) fn label(&mut self, target_index: Option<usize>) -> LabelId {
let label_id = self.labels.len();
self.labels.push(target_index);
LabelId(label_id)
}
/// Change the target of a label.
pub(crate) fn set_label(
&mut self,
label_id: LabelId,
target_index: usize,
) -> Result<(), CompileError> {
*self
.labels
.get_mut(label_id.0)
.ok_or(CompileError::UndefinedLabel {
label_id: label_id.0,
span: None,
})? = Some(target_index);
Ok(())
}
/// Insert an instruction into the block, automatically marking any registers populated by
/// the instruction, and freeing any registers consumed by the instruction.
///
/// Returns the offset of the inserted instruction.
#[track_caller]
pub(crate) fn push(
&mut self,
instruction: Spanned<Instruction>,
) -> Result<usize, CompileError> {
pub(crate) fn push(&mut self, instruction: Spanned<Instruction>) -> Result<(), CompileError> {
// Free read registers, and mark write registers.
//
// If a register is both read and written, it should be on both sides, so that we can verify
@ -258,11 +290,11 @@ impl BlockBuilder {
Err(err) => return Err(err),
}
let index = self.next_instruction_index();
self.instructions.push(instruction.item);
self.spans.push(instruction.span);
self.ast.push(None);
Ok(index)
self.comments.push(String::new());
Ok(())
}
/// Set the AST of the last instruction. Separate method because it's rarely used.
@ -270,6 +302,14 @@ impl BlockBuilder {
*self.ast.last_mut().expect("no last instruction") = ast_ref;
}
/// Add a comment to the last instruction.
pub(crate) fn add_comment(&mut self, comment: impl std::fmt::Display) {
add_comment(
self.comments.last_mut().expect("no last instruction"),
comment,
)
}
/// Load a register with a literal.
pub(crate) fn load_literal(
&mut self,
@ -308,7 +348,7 @@ impl BlockBuilder {
}
/// Drain the stream in a register (fully consuming it)
pub(crate) fn drain(&mut self, src: RegId, span: Span) -> Result<usize, CompileError> {
pub(crate) fn drain(&mut self, src: RegId, span: Span) -> Result<(), CompileError> {
self.push(Instruction::Drain { src }.into_spanned(span))
}
@ -341,83 +381,41 @@ impl BlockBuilder {
pub(crate) fn branch_if(
&mut self,
cond: RegId,
index: usize,
label_id: LabelId,
span: Span,
) -> Result<usize, CompileError> {
self.push(Instruction::BranchIf { cond, index }.into_spanned(span))
}
/// Add a placeholder `branch-if` instruction, which must be updated with
/// [`.set_branch_target()`]
pub(crate) fn branch_if_placeholder(
&mut self,
cond: RegId,
span: Span,
) -> Result<usize, CompileError> {
self.branch_if(cond, usize::MAX, span)
) -> Result<(), CompileError> {
self.push(
Instruction::BranchIf {
cond,
index: label_id.0,
}
.into_spanned(span),
)
}
/// Add a `branch-if-empty` instruction
pub(crate) fn branch_if_empty(
&mut self,
src: RegId,
index: usize,
label_id: LabelId,
span: Span,
) -> Result<usize, CompileError> {
self.push(Instruction::BranchIfEmpty { src, index }.into_spanned(span))
) -> Result<(), CompileError> {
self.push(
Instruction::BranchIfEmpty {
src,
index: label_id.0,
}
.into_spanned(span),
)
}
/// Add a `jump` instruction
pub(crate) fn jump(&mut self, index: usize, span: Span) -> Result<usize, CompileError> {
self.push(Instruction::Jump { index }.into_spanned(span))
}
/// Add a placeholder `jump` instruction, which must be updated with [`.set_branch_target()`]
pub(crate) fn jump_placeholder(&mut self, span: Span) -> Result<usize, CompileError> {
self.jump(usize::MAX, span)
}
/// Modify a branching instruction's branch target `index`
#[track_caller]
pub(crate) fn set_branch_target(
&mut self,
instruction_index: usize,
target_index: usize,
) -> Result<(), CompileError> {
match self.instructions.get_mut(instruction_index) {
Some(
Instruction::BranchIf { index, .. }
| Instruction::BranchIfEmpty { index, .. }
| Instruction::Jump { index }
| Instruction::Match { index, .. }
| Instruction::Iterate {
end_index: index, ..
}
| Instruction::OnError { index }
| Instruction::OnErrorInto { index, .. },
) => {
*index = target_index;
Ok(())
}
Some(_) => {
let other = &self.instructions[instruction_index];
log::warn!("set branch target failed ({instruction_index} => {target_index}), target instruction = {other:?}, builder = {self:#?}");
Err(CompileError::SetBranchTargetOfNonBranchInstruction {
instruction: format!("{other:?}"),
span: self.spans[instruction_index],
caller: std::panic::Location::caller().to_string(),
})
}
None => Err(CompileError::InstructionIndexOutOfRange {
index: instruction_index,
}),
}
pub(crate) fn jump(&mut self, label_id: LabelId, span: Span) -> Result<(), CompileError> {
self.push(Instruction::Jump { index: label_id.0 }.into_spanned(span))
}
/// The index that the next instruction [`.push()`]ed will have.
pub(crate) fn next_instruction_index(&self) -> usize {
pub(crate) fn here(&self) -> usize {
self.instructions.len()
}
@ -431,9 +429,14 @@ impl BlockBuilder {
Ok(next)
}
/// Push a new loop state onto the builder.
pub(crate) fn begin_loop(&mut self) {
self.loop_stack.push(LoopState::new());
/// Push a new loop state onto the builder. Creates new labels that must be set.
pub(crate) fn begin_loop(&mut self) -> Loop {
let loop_ = Loop {
break_label: self.label(None),
continue_label: self.label(None),
};
self.loop_stack.push(loop_);
loop_
}
/// True if we are currently in a loop.
@ -442,41 +445,32 @@ impl BlockBuilder {
}
/// Add a loop breaking jump instruction.
pub(crate) fn push_break(&mut self, span: Span) -> Result<usize, CompileError> {
let index = self.jump_placeholder(span)?;
self.loop_stack
.last_mut()
pub(crate) fn push_break(&mut self, span: Span) -> Result<(), CompileError> {
let loop_ = self
.loop_stack
.last()
.ok_or_else(|| CompileError::NotInALoop {
msg: "`break` called from outside of a loop".into(),
span: Some(span),
})?
.break_branches
.push(index);
Ok(index)
})?;
self.jump(loop_.break_label, span)
}
/// Add a loop continuing jump instruction.
pub(crate) fn push_continue(&mut self, span: Span) -> Result<usize, CompileError> {
let index = self.jump_placeholder(span)?;
self.loop_stack
.last_mut()
pub(crate) fn push_continue(&mut self, span: Span) -> Result<(), CompileError> {
let loop_ = self
.loop_stack
.last()
.ok_or_else(|| CompileError::NotInALoop {
msg: "`continue` called from outside of a loop".into(),
span: Some(span),
})?
.continue_branches
.push(index);
Ok(index)
})?;
self.jump(loop_.continue_label, span)
}
/// Pop the loop state and set any `break` or `continue` instructions to their appropriate
/// target instruction indexes.
pub(crate) fn end_loop(
&mut self,
break_target_index: usize,
continue_target_index: usize,
) -> Result<(), CompileError> {
let loop_state = self
/// Pop the loop state. Checks that the loop being ended is the same one that was expected.
pub(crate) fn end_loop(&mut self, loop_: Loop) -> Result<(), CompileError> {
let ended_loop = self
.loop_stack
.pop()
.ok_or_else(|| CompileError::NotInALoop {
@ -484,50 +478,86 @@ impl BlockBuilder {
span: None,
})?;
for break_index in loop_state.break_branches {
self.set_branch_target(break_index, break_target_index)?;
if ended_loop == loop_ {
Ok(())
} else {
Err(CompileError::IncoherentLoopState)
}
for continue_index in loop_state.continue_branches {
self.set_branch_target(continue_index, continue_target_index)?;
}
Ok(())
}
/// Mark an unreachable code path. Produces an error at runtime if executed.
pub(crate) fn unreachable(&mut self, span: Span) -> Result<usize, CompileError> {
pub(crate) fn unreachable(&mut self, span: Span) -> Result<(), CompileError> {
self.push(Instruction::Unreachable.into_spanned(span))
}
/// Consume the builder and produce the final [`IrBlock`].
pub(crate) fn finish(self) -> IrBlock {
IrBlock {
pub(crate) fn finish(mut self) -> Result<IrBlock, CompileError> {
// Add comments to label targets
for (index, label_target) in self.labels.iter().enumerate() {
if let Some(label_target) = label_target {
add_comment(
&mut self.comments[*label_target],
format_args!("label({index})"),
);
}
}
// Populate the actual target indices of labels into the instructions
for ((index, instruction), span) in
self.instructions.iter_mut().enumerate().zip(&self.spans)
{
if let Some(label_id) = instruction.branch_target() {
let target_index = self.labels.get(label_id).cloned().flatten().ok_or(
CompileError::UndefinedLabel {
label_id,
span: Some(*span),
},
)?;
// Add a comment to the target index that we come from here
add_comment(
&mut self.comments[target_index],
format_args!("from({index}:)"),
);
instruction.set_branch_target(target_index).map_err(|_| {
CompileError::SetBranchTargetOfNonBranchInstruction {
instruction: format!("{:?}", instruction),
span: *span,
}
})?;
}
}
Ok(IrBlock {
instructions: self.instructions,
spans: self.spans,
data: self.data.into(),
ast: self.ast,
comments: self.comments.into_iter().map(|s| s.into()).collect(),
register_count: self
.register_allocation_state
.len()
.try_into()
.expect("register count overflowed in finish() despite previous checks"),
file_count: self.file_count,
}
})
}
}
/// Keeps track of `break` and `continue` branches that need to be set up after a loop is compiled.
#[derive(Debug)]
pub(crate) struct LoopState {
break_branches: Vec<usize>,
continue_branches: Vec<usize>,
/// Keeps track of the `break` and `continue` target labels for a loop.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) struct Loop {
pub(crate) break_label: LabelId,
pub(crate) continue_label: LabelId,
}
impl LoopState {
pub(crate) const fn new() -> Self {
LoopState {
break_branches: vec![],
continue_branches: vec![],
}
}
/// Add a new comment to an existing one
fn add_comment(comment: &mut String, new_comment: impl std::fmt::Display) {
use std::fmt::Write;
write!(
comment,
"{}{}",
if comment.is_empty() { "" } else { ", " },
new_comment
)
.expect("formatting failed");
}

View File

@ -36,6 +36,10 @@ pub(crate) fn compile_if(
let true_block_id = true_block_arg.as_block().ok_or_else(invalid)?;
let true_block = working_set.get_block(true_block_id);
let true_label = builder.label(None);
let false_label = builder.label(None);
let end_label = builder.label(None);
let not_condition_reg = {
// Compile the condition first
let condition_reg = builder.next_register()?;
@ -59,10 +63,12 @@ pub(crate) fn compile_if(
condition_reg
};
// Set up a branch if the condition is false. Will go back and fix this to the right offset
let index_of_branch_if = builder.branch_if_placeholder(not_condition_reg, call.head)?;
// Set up a branch if the condition is false.
builder.branch_if(not_condition_reg, false_label, call.head)?;
builder.add_comment("if false");
// Compile the true case
builder.set_label(true_label, builder.here())?;
compile_block(
working_set,
builder,
@ -73,12 +79,11 @@ pub(crate) fn compile_if(
)?;
// Add a jump over the false case
let index_of_jump = builder.jump_placeholder(else_arg.map(|e| e.span).unwrap_or(call.head))?;
// Change the branch-if target to after the jump
builder.set_branch_target(index_of_branch_if, index_of_jump + 1)?;
builder.jump(end_label, else_arg.map(|e| e.span).unwrap_or(call.head))?;
builder.add_comment("end if");
// On the else side now, assert that io_reg is still valid
builder.set_label(false_label, builder.here())?;
builder.mark_register(io_reg)?;
if let Some(else_arg) = else_arg {
@ -125,8 +130,8 @@ pub(crate) fn compile_if(
builder.load_empty(io_reg)?;
}
// Change the jump target to the next index (out of the if-else)
builder.set_branch_target(index_of_jump, builder.next_instruction_index())?;
// Set the end label
builder.set_label(end_label, builder.here())?;
Ok(())
}
@ -185,34 +190,41 @@ pub(crate) fn compile_match(
builder.push(Instruction::Collect { src_dst: match_reg }.into_spanned(match_expr.span))?;
// Generate the `match` instructions. Guards are not used at this stage.
let match_offset = builder.next_instruction_index();
let mut match_labels = Vec::with_capacity(match_block.len());
let mut next_labels = Vec::with_capacity(match_block.len());
let end_label = builder.label(None);
for (pattern, _) in match_block {
let match_label = builder.label(None);
match_labels.push(match_label);
builder.push(
Instruction::Match {
pattern: Box::new(pattern.pattern.clone()),
src: match_reg,
index: usize::MAX, // placeholder
index: match_label.0,
}
.into_spanned(pattern.span),
)?;
// Also add a label for the next match instruction or failure case
next_labels.push(builder.label(Some(builder.here())));
}
let mut end_jumps = Vec::with_capacity(match_block.len() + 1);
// Match fall-through to jump to the end, if no match
builder.load_empty(io_reg)?;
builder.drop_reg(match_reg)?;
end_jumps.push(builder.jump_placeholder(call.head)?);
builder.jump(end_label, call.head)?;
// Generate each of the match expressions. Handle guards here, if present.
for (index, (pattern, expr)) in match_block.iter().enumerate() {
let match_label = match_labels[index];
let next_label = next_labels[index];
// `io_reg` and `match_reg` are still valid at each of these branch targets
builder.mark_register(io_reg)?;
builder.mark_register(match_reg)?;
// Set the original match instruction target here
builder.set_branch_target(match_offset + index, builder.next_instruction_index())?;
builder.set_label(match_label, builder.here())?;
// Handle guard, if present
if let Some(guard) = &pattern.guard {
@ -229,19 +241,16 @@ pub(crate) fn compile_match(
.push(Instruction::CheckMatchGuard { src: guard_reg }.into_spanned(guard.span))?;
builder.push(Instruction::Not { src_dst: guard_reg }.into_spanned(guard.span))?;
// Branch to the next match instruction if the branch fails to match
builder.push(
Instruction::BranchIf {
cond: guard_reg,
index: match_offset + index + 1,
}
.into_spanned(
// Span the branch with the next pattern, or the head if this is the end
match_block
.get(index + 1)
.map(|b| b.0.span)
.unwrap_or(call.head),
),
builder.branch_if(
guard_reg,
next_label,
// Span the branch with the next pattern, or the head if this is the end
match_block
.get(index + 1)
.map(|b| b.0.span)
.unwrap_or(call.head),
)?;
builder.add_comment("if match guard false");
}
// match_reg no longer needed, successful match
@ -269,14 +278,13 @@ pub(crate) fn compile_match(
)?;
}
// Rewrite this jump to the end afterward
end_jumps.push(builder.jump_placeholder(call.head)?);
// Jump to the end after the match logic is done
builder.jump(end_label, call.head)?;
builder.add_comment("end match");
}
// Rewrite the end jumps to the next instruction
for index in end_jumps {
builder.set_branch_target(index, builder.next_instruction_index())?;
}
// Set the end destination
builder.set_label(end_label, builder.here())?;
Ok(())
}
@ -334,6 +342,7 @@ pub(crate) fn compile_let(
}
.into_spanned(call.head),
)?;
builder.add_comment("let");
// Don't forget to set io_reg to Empty afterward, as that's the result of an assignment
builder.load_empty(io_reg)?;
@ -395,6 +404,10 @@ pub(crate) fn compile_try(
};
let catch_span = catch_expr.map(|e| e.span).unwrap_or(call.head);
let err_label = builder.label(None);
let failed_label = builder.label(None);
let end_label = builder.label(None);
// We have two ways of executing `catch`: if it was provided as a literal, we can inline it.
// Otherwise, we have to evaluate the expression and keep it as a register, and then call `do`.
enum CatchType<'a> {
@ -430,21 +443,23 @@ pub(crate) fn compile_try(
})
.transpose()?;
// Put the error handler placeholder. If we have a catch expression then we should capture the
// Put the error handler instruction. If we have a catch expression then we should capture the
// error.
let error_handler_index = if catch_type.is_some() {
if catch_type.is_some() {
builder.push(
Instruction::OnErrorInto {
index: usize::MAX,
index: err_label.0,
dst: io_reg,
}
.into_spanned(call.head),
)?
} else {
// Otherwise, we don't need the error value.
builder.push(Instruction::OnError { index: usize::MAX }.into_spanned(call.head))?
builder.push(Instruction::OnError { index: err_label.0 }.into_spanned(call.head))?
};
builder.add_comment("try");
// Compile the block
compile_block(
working_set,
@ -464,23 +479,24 @@ pub(crate) fn compile_try(
}
.into_spanned(catch_span),
)?;
let external_failed_branch = builder.branch_if_placeholder(failed_reg, catch_span)?;
builder.branch_if(failed_reg, failed_label, catch_span)?;
// Successful case: pop the error handler
builder.push(Instruction::PopErrorHandler.into_spanned(call.head))?;
// Jump over the failure case
let jump_index = builder.jump_placeholder(catch_span)?;
builder.jump(end_label, catch_span)?;
// Set up an error handler preamble for failed external.
// Draining the %io_reg results in the error handler being called with Empty, and sets
// $env.LAST_EXIT_CODE
builder.set_branch_target(external_failed_branch, builder.next_instruction_index())?;
builder.set_label(failed_label, builder.here())?;
builder.drain(io_reg, catch_span)?;
builder.add_comment("branches to err");
builder.unreachable(catch_span)?;
// This is the real error handler - go back and set the right branch destination
builder.set_branch_target(error_handler_index, builder.next_instruction_index())?;
// This is the real error handler
builder.set_label(err_label, builder.here())?;
// Mark out register as likely not clean - state in error handler is not well defined
builder.mark_register(io_reg)?;
@ -559,7 +575,7 @@ pub(crate) fn compile_try(
}
// This is the end - if we succeeded, should jump here
builder.set_branch_target(jump_index, builder.next_instruction_index())?;
builder.set_label(end_label, builder.here())?;
Ok(())
}
@ -588,10 +604,10 @@ pub(crate) fn compile_loop(
let block_id = block_arg.as_block().ok_or_else(invalid)?;
let block = working_set.get_block(block_id);
builder.begin_loop();
let loop_ = builder.begin_loop();
builder.load_empty(io_reg)?;
let loop_index = builder.next_instruction_index();
builder.set_label(loop_.continue_label, builder.here())?;
compile_block(
working_set,
@ -605,10 +621,11 @@ pub(crate) fn compile_loop(
// Drain the output, just like for a semicolon
builder.drain(io_reg, call.head)?;
builder.jump(loop_index, call.head)?;
builder.jump(loop_.continue_label, call.head)?;
builder.add_comment("loop");
let end_index = builder.next_instruction_index();
builder.end_loop(end_index, loop_index)?;
builder.set_label(loop_.break_label, builder.here())?;
builder.end_loop(loop_)?;
// State of %io_reg is not necessarily well defined here due to control flow, so make sure it's
// empty.
@ -645,9 +662,10 @@ pub(crate) fn compile_while(
let block_id = block_arg.as_block().ok_or_else(invalid)?;
let block = working_set.get_block(block_id);
builder.begin_loop();
let loop_ = builder.begin_loop();
builder.set_label(loop_.continue_label, builder.here())?;
let loop_index = builder.next_instruction_index();
let true_label = builder.label(None);
compile_expression(
working_set,
@ -658,10 +676,12 @@ pub(crate) fn compile_while(
io_reg,
)?;
let branch_true_index = builder.branch_if_placeholder(io_reg, call.head)?;
let jump_false_index = builder.jump_placeholder(call.head)?;
builder.branch_if(io_reg, true_label, call.head)?;
builder.add_comment("while");
builder.jump(loop_.break_label, call.head)?;
builder.add_comment("end while");
builder.set_branch_target(branch_true_index, builder.next_instruction_index())?;
builder.set_label(true_label, builder.here())?;
compile_block(
working_set,
@ -675,12 +695,11 @@ pub(crate) fn compile_while(
// Drain the result, just like for a semicolon
builder.drain(io_reg, call.head)?;
builder.jump(loop_index, call.head)?;
builder.jump(loop_.continue_label, call.head)?;
builder.add_comment("while");
let end_index = builder.next_instruction_index();
builder.set_branch_target(jump_false_index, end_index)?;
builder.end_loop(end_index, loop_index)?;
builder.set_label(loop_.break_label, builder.here())?;
builder.end_loop(loop_)?;
// State of %io_reg is not necessarily well defined here due to control flow, so make sure it's
// empty.
@ -730,9 +749,6 @@ pub(crate) fn compile_for(
// Ensure io_reg is marked so we don't use it
builder.mark_register(io_reg)?;
// Set up loop state
builder.begin_loop();
let stream_reg = builder.next_register()?;
compile_expression(
@ -744,16 +760,21 @@ pub(crate) fn compile_for(
stream_reg,
)?;
// Set up loop state
let loop_ = builder.begin_loop();
builder.set_label(loop_.continue_label, builder.here())?;
// This gets a value from the stream each time it's executed
// io_reg basically will act as our scratch register here
let iterate_index = builder.push(
builder.push(
Instruction::Iterate {
dst: io_reg,
stream: stream_reg,
end_index: usize::MAX, // placeholder
end_index: loop_.break_label.0,
}
.into_spanned(call.head),
)?;
builder.add_comment("for");
// Put the received value in the variable
builder.push(
@ -778,14 +799,11 @@ pub(crate) fn compile_for(
builder.drain(io_reg, call.head)?;
// Loop back to iterate to get the next value
builder.jump(iterate_index, call.head)?;
builder.jump(loop_.continue_label, call.head)?;
// Update the iterate target to the end of the loop
let end_index = builder.next_instruction_index();
builder.set_branch_target(iterate_index, end_index)?;
// Handle break/continue
builder.end_loop(end_index, iterate_index)?;
// Set the end of the loop
builder.set_label(loop_.break_label, builder.here())?;
builder.end_loop(loop_)?;
// We don't need stream_reg anymore, after the loop
// io_reg may or may not be empty, so be sure it is
@ -807,6 +825,7 @@ pub(crate) fn compile_break(
if builder.is_in_loop() {
builder.load_empty(io_reg)?;
builder.push_break(call.head)?;
builder.add_comment("break");
} else {
// Fall back to calling the command if we can't find the loop target statically
builder.push(
@ -831,6 +850,7 @@ pub(crate) fn compile_continue(
if builder.is_in_loop() {
builder.load_empty(io_reg)?;
builder.push_continue(call.head)?;
builder.add_comment("continue");
} else {
// Fall back to calling the command if we can't find the loop target statically
builder.push(

View File

@ -39,7 +39,7 @@ pub fn compile(working_set: &StateWorkingSet, block: &Block) -> Result<IrBlock,
// A complete block has to end with a `return`
builder.push(Instruction::Return { src: BLOCK_INPUT }.into_spanned(span))?;
Ok(builder.finish())
builder.finish()
}
/// Compiles a [`Block`] in-place into an IR block. This can be used in a nested manner, for example

View File

@ -61,11 +61,14 @@ pub(crate) fn compile_binary_op(
match op.item {
// `and` / `or` are short-circuiting, and we can get by with one register and a branch
Operator::Boolean(Boolean::And) => {
builder.branch_if(lhs_reg, builder.next_instruction_index() + 2, op.span)?;
let true_label = builder.label(None);
builder.branch_if(lhs_reg, true_label, op.span)?;
// If the branch was not taken it's false, so short circuit to load false
let jump_false = builder.jump_placeholder(op.span)?;
let false_label = builder.label(None);
builder.jump(false_label, op.span)?;
builder.set_label(true_label, builder.here())?;
compile_expression(
working_set,
builder,
@ -75,16 +78,18 @@ pub(crate) fn compile_binary_op(
lhs_reg,
)?;
let jump_end = builder.jump_placeholder(op.span)?;
let end_label = builder.label(None);
builder.jump(end_label, op.span)?;
// Consumed by `branch-if`, so we have to set it false again
builder.set_branch_target(jump_false, builder.next_instruction_index())?;
builder.set_label(false_label, builder.here())?;
builder.load_literal(lhs_reg, Literal::Bool(false).into_spanned(lhs.span))?;
builder.set_branch_target(jump_end, builder.next_instruction_index())?;
builder.set_label(end_label, builder.here())?;
}
Operator::Boolean(Boolean::Or) => {
let branch_true = builder.branch_if_placeholder(lhs_reg, op.span)?;
let true_label = builder.label(None);
builder.branch_if(lhs_reg, true_label, op.span)?;
// If the branch was not taken it's false, so do the right-side expression
compile_expression(
@ -96,13 +101,14 @@ pub(crate) fn compile_binary_op(
lhs_reg,
)?;
let jump_end = builder.jump_placeholder(op.span)?;
let end_label = builder.label(None);
builder.jump(end_label, op.span)?;
// Consumed by `branch-if`, so we have to set it true again
builder.set_branch_target(branch_true, builder.next_instruction_index())?;
builder.set_label(true_label, builder.here())?;
builder.load_literal(lhs_reg, Literal::Bool(true).into_spanned(lhs.span))?;
builder.set_branch_target(jump_end, builder.next_instruction_index())?;
builder.set_label(end_label, builder.here())?;
}
_ => {
// Any other operator, via `binary-op`
@ -219,18 +225,19 @@ pub(crate) fn compile_assignment(
)?;
// Default to empty record so we can do further upserts
builder.branch_if_empty(
head_reg,
builder.next_instruction_index() + 2,
assignment_span,
)?;
builder.jump(builder.next_instruction_index() + 2, assignment_span)?;
let default_label = builder.label(None);
let upsert_label = builder.label(None);
builder.branch_if_empty(head_reg, default_label, assignment_span)?;
builder.jump(upsert_label, assignment_span)?;
builder.set_label(default_label, builder.here())?;
builder.load_literal(
head_reg,
Literal::Record { capacity: 0 }.into_spanned(lhs.span),
)?;
// Do the upsert on the current value to incorporate rhs
builder.set_label(upsert_label, builder.here())?;
compile_upsert_cell_path(
builder,
(&path.tail[1..]).into_spanned(lhs.span),

View File

@ -84,13 +84,12 @@ pub enum CompileError {
#[error("Attempted to set branch target of non-branch instruction.")]
#[diagnostic(
code(nu::compile::set_branch_target_of_non_branch_instruction),
help("this is a compiler bug. Please report it at https://github.com/nushell/nushell/issues/new\nfrom: {caller}"),
help("this is a compiler bug. Please report it at https://github.com/nushell/nushell/issues/new"),
)]
SetBranchTargetOfNonBranchInstruction {
instruction: String,
#[label("tried to modify: {instruction}")]
span: Span,
caller: String,
},
#[error("Instruction index out of range: {index}.")]
@ -207,4 +206,29 @@ pub enum CompileError {
#[label("can't be used outside of a loop")]
span: Option<Span>,
},
#[error("Incoherent loop state: the loop that ended was not the one we were expecting.")]
#[diagnostic(
code(nu::compile::incoherent_loop_state),
help("this is a compiler bug. Please report it at https://github.com/nushell/nushell/issues/new"),
)]
IncoherentLoopState,
#[error("Undefined label `{label_id}`.")]
#[diagnostic(
code(nu::compile::undefined_label),
help("this is a compiler bug. Please report it at https://github.com/nushell/nushell/issues/new"),
)]
UndefinedLabel {
label_id: usize,
#[label("label was used while compiling this code")]
span: Option<Span>,
},
#[error("Offset overflow: tried to add {offset} to {here}.")]
#[diagnostic(
code(nu::compile::offset_overflow),
help("this is a compiler bug. Please report it at https://github.com/nushell/nushell/issues/new"),
)]
OffsetOverflow { here: usize, offset: isize },
}

View File

@ -31,8 +31,7 @@ impl<'a> fmt::Display for FmtIrBlock<'a> {
)?;
}
for (index, instruction) in self.ir_block.instructions.iter().enumerate() {
writeln!(
f,
let formatted = format!(
"{:-4}: {}",
index,
FmtInstruction {
@ -40,7 +39,13 @@ impl<'a> fmt::Display for FmtIrBlock<'a> {
instruction,
data: &self.ir_block.data,
}
)?;
);
let comment = &self.ir_block.comments[index];
if comment.is_empty() {
writeln!(f, "{formatted}")?;
} else {
writeln!(f, "{formatted:40} # {comment}")?;
}
}
Ok(())
}

View File

@ -22,6 +22,8 @@ pub struct IrBlock {
#[serde(with = "serde_arc_u8_array")]
pub data: Arc<[u8]>,
pub ast: Vec<Option<IrAstRef>>,
/// Additional information that can be added to help with debugging
pub comments: Vec<Box<str>>,
pub register_count: u32,
pub file_count: u32,
}
@ -33,6 +35,7 @@ impl fmt::Debug for IrBlock {
.field("instructions", &self.instructions)
.field("spans", &self.spans)
.field("data", &self.data)
.field("comments", &self.comments)
.field("register_count", &self.register_count)
.field("file_count", &self.register_count)
.finish_non_exhaustive()
@ -272,6 +275,55 @@ impl Instruction {
data,
}
}
/// Returns the branch target index of the instruction if this is a branching instruction.
pub fn branch_target(&self) -> Option<usize> {
match self {
Instruction::Jump { index } => Some(*index),
Instruction::BranchIf { cond: _, index } => Some(*index),
Instruction::BranchIfEmpty { src: _, index } => Some(*index),
Instruction::Match {
pattern: _,
src: _,
index,
} => Some(*index),
Instruction::Iterate {
dst: _,
stream: _,
end_index,
} => Some(*end_index),
Instruction::OnError { index } => Some(*index),
Instruction::OnErrorInto { index, dst: _ } => Some(*index),
_ => None,
}
}
/// Sets the branch target of the instruction if this is a branching instruction.
///
/// Returns `Err(target_index)` if it isn't a branching instruction.
pub fn set_branch_target(&mut self, target_index: usize) -> Result<(), usize> {
match self {
Instruction::Jump { index } => *index = target_index,
Instruction::BranchIf { cond: _, index } => *index = target_index,
Instruction::BranchIfEmpty { src: _, index } => *index = target_index,
Instruction::Match {
pattern: _,
src: _,
index,
} => *index = target_index,
Instruction::Iterate {
dst: _,
stream: _,
end_index,
} => *end_index = target_index,
Instruction::OnError { index } => *index = target_index,
Instruction::OnErrorInto { index, dst: _ } => *index = target_index,
_ => return Err(target_index),
}
Ok(())
}
}
// This is to document/enforce the size of `Instruction` in bytes.