match support

This commit is contained in:
Devyn Cairns 2024-07-03 17:36:26 -07:00
parent 3bdb864b9d
commit 062821039a
No known key found for this signature in database
16 changed files with 407 additions and 39 deletions

1
Cargo.lock generated
View File

@ -3241,6 +3241,7 @@ dependencies = [
"convert_case",
"fancy-regex",
"indexmap",
"log",
"lru",
"miette",
"nix",

View File

@ -263,10 +263,13 @@ impl CallExt for ir::Call {
fn opt_const<T: FromValue>(
&self,
working_set: &StateWorkingSet,
pos: usize,
_working_set: &StateWorkingSet,
_pos: usize,
) -> Result<Option<T>, ShellError> {
todo!("opt_const is not yet implemented on ir::Call")
Err(ShellError::IrEvalError {
msg: "const evaluation is not yet implemented on ir::Call".into(),
span: Some(self.head),
})
}
fn req<T: FromValue>(

View File

@ -65,6 +65,7 @@ impl BlockBuilder {
}
/// Mark a register as empty, so that it can be used again by something else.
#[track_caller]
pub(crate) fn free_register(&mut self, reg_id: RegId) -> Result<(), CompileError> {
let index = reg_id.0 as usize;
@ -77,7 +78,10 @@ impl BlockBuilder {
Ok(())
} else {
log::warn!("register {reg_id} uninitialized, builder = {self:#?}");
Err(CompileError::RegisterUninitialized { reg_id })
Err(CompileError::RegisterUninitialized {
reg_id,
caller: std::panic::Location::caller().to_string(),
})
}
}
@ -85,6 +89,7 @@ impl BlockBuilder {
/// the instruction, and freeing any registers consumed by the instruction.
///
/// Returns the offset of the inserted instruction.
#[track_caller]
pub(crate) fn push(
&mut self,
instruction: Spanned<Instruction>,
@ -193,6 +198,11 @@ impl BlockBuilder {
}
Instruction::Jump { index: _ } => (),
Instruction::BranchIf { cond, index: _ } => self.free_register(*cond)?,
Instruction::Match {
pattern: _,
src: _,
index: _,
} => (),
Instruction::Iterate {
dst,
stream: _,
@ -304,6 +314,7 @@ impl BlockBuilder {
}
/// Modify a branching instruction's branch target `index`
#[track_caller]
pub(crate) fn set_branch_target(
&mut self,
instruction_index: usize,
@ -313,6 +324,7 @@ impl BlockBuilder {
Some(
Instruction::BranchIf { index, .. }
| Instruction::Jump { index }
| Instruction::Match { index, .. }
| Instruction::Iterate {
end_index: index, ..
}
@ -322,7 +334,17 @@ impl BlockBuilder {
*index = target_index;
Ok(())
}
Some(_) => Err(CompileError::SetBranchTargetOfNonBranchInstruction),
Some(_) => {
let other = &self.instructions[instruction_index];
log::warn!("set branch target failed ({instruction_index} => {target_index}), target instruction = {other:?}, builder = {self:#?}");
Err(CompileError::SetBranchTargetOfNonBranchInstruction {
instruction: format!("{other:?}"),
span: self.spans[instruction_index],
caller: std::panic::Location::caller().to_string(),
})
}
None => Err(CompileError::InstructionIndexOutOfRange {
index: instruction_index,
}),

View File

@ -21,6 +21,9 @@ pub(crate) fn compile_call(
"if" => {
return compile_if(working_set, builder, call, redirect_modes, io_reg);
}
"match" => {
return compile_match(working_set, builder, call, redirect_modes, io_reg);
}
"const" => {
// This differs from the behavior of the const command, which adds the const value
// to the stack. Since `load-variable` also checks `engine_state` for the variable

View File

@ -188,7 +188,7 @@ pub(crate) fn compile_expression(
}
Expr::Block(block_id) => lit(builder, Literal::Block(*block_id)),
Expr::Closure(block_id) => lit(builder, Literal::Closure(*block_id)),
Expr::MatchBlock(_) => Err(todo("MatchBlock")),
Expr::MatchBlock(_) => Err(unexpected("MatchBlock")), // only for `match` keyword
Expr::List(items) => {
// Guess capacity based on items (does not consider spread as more than 1)
lit(

View File

@ -131,6 +131,153 @@ pub(crate) fn compile_if(
Ok(())
}
/// Compile a call to `match`
pub(crate) fn compile_match(
working_set: &StateWorkingSet,
builder: &mut BlockBuilder,
call: &Call,
redirect_modes: RedirectModes,
io_reg: RegId,
) -> Result<(), CompileError> {
// Pseudocode:
//
// %match_reg <- <match_expr>
// collect %match_reg
// match (pat1), %match_reg, PAT1
// MATCH2: match (pat2), %match_reg, PAT2
// FAIL: drop %io_reg
// drop %match_reg
// jump END
// PAT1: %guard_reg <- <guard_expr>
// not %guard_reg
// branch-if %guard_reg, MATCH2
// drop %match_reg
// <...expr...>
// jump END
// PAT2: drop %match_reg
// <...expr...>
// jump END
// END:
let invalid = || CompileError::InvalidKeywordCall {
keyword: "match".into(),
span: call.head,
};
let match_expr = call.positional_nth(0).ok_or_else(invalid)?;
let match_block_arg = call.positional_nth(1).ok_or_else(invalid)?;
let match_block = match_block_arg.as_match_block().ok_or_else(invalid)?;
let match_reg = builder.next_register()?;
// Evaluate the match expression (patterns will be checked against this).
compile_expression(
working_set,
builder,
match_expr,
redirect_modes.with_capture_out(match_expr.span),
None,
match_reg,
)?;
// Important to collect it first
builder.push(Instruction::Collect { src_dst: match_reg }.into_spanned(match_expr.span))?;
// Generate the `match` instructions. Guards are not used at this stage.
let match_offset = builder.next_instruction_index();
for (pattern, _) in match_block {
builder.push(
Instruction::Match {
pattern: Box::new(pattern.pattern.clone()),
src: match_reg,
index: usize::MAX, // placeholder
}
.into_spanned(pattern.span),
)?;
}
let mut end_jumps = Vec::with_capacity(match_block.len() + 1);
// Match fall-through to jump to the end, if no match
builder.load_empty(io_reg)?;
builder.drop_reg(match_reg)?;
end_jumps.push(builder.jump_placeholder(call.head)?);
// Generate each of the match expressions. Handle guards here, if present.
for (index, (pattern, expr)) in match_block.iter().enumerate() {
// `io_reg` and `match_reg` are still valid at each of these branch targets
builder.mark_register(io_reg)?;
builder.mark_register(match_reg)?;
// Set the original match instruction target here
builder.set_branch_target(match_offset + index, builder.next_instruction_index())?;
// Handle guard, if present
if let Some(guard) = &pattern.guard {
let guard_reg = builder.next_register()?;
compile_expression(
working_set,
builder,
guard,
redirect_modes.with_capture_out(guard.span),
None,
guard_reg,
)?;
builder.push(Instruction::Not { src_dst: guard_reg }.into_spanned(guard.span))?;
// Branch to the next match instruction if the branch fails to match
builder.push(
Instruction::BranchIf {
cond: guard_reg,
index: match_offset + index + 1,
}
.into_spanned(
// Span the branch with the next pattern, or the head if this is the end
match_block
.get(index + 1)
.map(|b| b.0.span)
.unwrap_or(call.head),
),
)?;
}
// match_reg no longer needed, successful match
builder.drop_reg(match_reg)?;
// Execute match right hand side expression
if let Some(block_id) = expr.as_block() {
let block = working_set.get_block(block_id);
compile_block(
working_set,
builder,
block,
redirect_modes.clone(),
Some(io_reg),
io_reg,
)?;
} else {
compile_expression(
working_set,
builder,
expr,
redirect_modes.clone(),
Some(io_reg),
io_reg,
)?;
}
// Rewrite this jump to the end afterward
end_jumps.push(builder.jump_placeholder(call.head)?);
}
// Rewrite the end jumps to the next instruction
for index in end_jumps {
builder.set_branch_target(index, builder.next_instruction_index())?;
}
Ok(())
}
/// Compile a call to `let` or `mut` (just do store-variable)
pub(crate) fn compile_let(
working_set: &StateWorkingSet,

View File

@ -4,7 +4,7 @@ use nu_path::expand_path_with;
use nu_protocol::{
ast::{Bits, Block, Boolean, CellPath, Comparison, Math, Operator},
debugger::DebugContext,
engine::{Argument, Closure, EngineState, ErrorHandler, Redirection, Stack},
engine::{Argument, Closure, EngineState, ErrorHandler, Matcher, Redirection, Stack},
ir::{Call, DataSlice, Instruction, IrBlock, Literal, RedirectMode},
record, DeclId, IntoPipelineData, IntoSpanned, ListStream, OutDest, PipelineData, Range,
Record, RegId, ShellError, Span, Spanned, Value, VarId,
@ -37,6 +37,7 @@ pub fn eval_ir_block<D: DebugContext>(
error_handler_base,
redirect_out: None,
redirect_err: None,
matches: vec![],
registers: &mut registers[..],
},
&block_span,
@ -76,6 +77,8 @@ struct EvalContext<'a> {
redirect_out: Option<Redirection>,
/// State set by redirect-err
redirect_err: Option<Redirection>,
/// Scratch space to use for `match`
matches: Vec<(VarId, Value)>,
registers: &'a mut [PipelineData],
}
@ -508,6 +511,32 @@ fn eval_instruction(
Ok(Continue)
}
}
Instruction::Match {
pattern,
src,
index,
} => {
let data = ctx.take_reg(*src);
let PipelineData::Value(value, metadata) = data else {
return Err(ShellError::IrEvalError {
msg: "must collect value before match".into(),
span: Some(*span),
});
};
ctx.matches.clear();
if pattern.match_value(&value, &mut ctx.matches) {
// Match succeeded: set variables and branch
for (var_id, match_value) in ctx.matches.drain(..) {
ctx.stack.add_var(var_id, match_value);
}
Ok(Branch(*index))
} else {
// Failed to match, put back original value
ctx.matches.clear();
ctx.put_reg(*src, PipelineData::Value(value, metadata));
Ok(Continue)
}
}
Instruction::Iterate {
dst,
stream,
@ -720,15 +749,15 @@ fn eval_call(
let mut stack = stack.push_redirection(redirect_out.take(), redirect_err.take());
// should this be precalculated? ideally we just use the call builder...
let span = stack
.arguments
.get_args(*args_base, args_len)
.into_iter()
.fold(head, |span, arg| {
arg.span()
.map(|arg_span| span.append(arg_span))
.unwrap_or(span)
});
let span = Span::merge_many(
std::iter::once(head).chain(
stack
.arguments
.get_args(*args_base, args_len)
.into_iter()
.flat_map(|arg| arg.span()),
),
);
let call = Call {
decl_id,
head,

View File

@ -39,7 +39,7 @@ pub fn parse_pattern(working_set: &mut StateWorkingSet, span: Span) -> MatchPatt
let value = parse_value(working_set, span, &SyntaxShape::Any);
MatchPattern {
pattern: Pattern::Value(value),
pattern: Pattern::Value(Box::new(value)),
guard: None,
span,
}

View File

@ -4415,7 +4415,7 @@ pub fn parse_match_block_expression(working_set: &mut StateWorkingSet, span: Spa
&SyntaxShape::MathExpression,
);
pattern.guard = Some(guard);
pattern.guard = Some(Box::new(guard));
position += if found { start + 1 } else { start };
connector = working_set.get_span_contents(output[position].span);
}
@ -5837,22 +5837,27 @@ pub fn parse_block(
// Do not try to compile blocks that are subexpressions, or when we've already had a parse
// failure as that definitely will fail to compile
if !is_subexpression && working_set.parse_errors.is_empty() {
match nu_engine::compile(working_set, &block) {
Ok(ir_block) => {
block.ir_block = Some(ir_block);
}
Err(err) => working_set
.parse_warnings
.push(ParseWarning::IrCompileError {
span,
errors: vec![err],
}),
}
compile_block(working_set, &mut block);
}
block
}
/// Compile an IR block for the `Block`, adding a parse warning on failure
fn compile_block(working_set: &mut StateWorkingSet<'_>, block: &mut Block) {
match nu_engine::compile(working_set, &block) {
Ok(ir_block) => {
block.ir_block = Some(ir_block);
}
Err(err) => working_set
.parse_warnings
.push(ParseWarning::IrCompileError {
span: block.span.unwrap_or(Span::unknown()),
errors: vec![err],
}),
}
}
pub fn discover_captures_in_closure(
working_set: &StateWorkingSet,
block: &Block,
@ -6295,12 +6300,14 @@ fn wrap_expr_with_collect(working_set: &mut StateWorkingSet, expr: &Expression)
default_value: None,
});
let block = Block {
let mut block = Block {
pipelines: vec![Pipeline::from_vec(vec![expr.clone()])],
signature: Box::new(signature),
..Default::default()
};
compile_block(working_set, &mut block);
let block_id = working_set.add_block(Arc::new(block));
output.push(Argument::Positional(Expression::new(

View File

@ -33,6 +33,7 @@ serde = { workspace = true, default-features = false }
thiserror = "1.0"
typetag = "0.2"
os_pipe = { workspace = true, features = ["io_safety"] }
log = { workspace = true }
[target.'cfg(unix)'.dependencies]
nix = { workspace = true, default-features = false, features = ["signal"] }
@ -54,4 +55,4 @@ tempfile = { workspace = true }
os_pipe = { workspace = true }
[package.metadata.docs.rs]
all-features = true
all-features = true

View File

@ -5,7 +5,7 @@ use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct MatchPattern {
pub pattern: Pattern,
pub guard: Option<Expression>,
pub guard: Option<Box<Expression>>,
pub span: Span,
}
@ -21,7 +21,7 @@ pub enum Pattern {
List(Vec<MatchPattern>),
// TODO: it would be nice if this didn't depend on AST
// maybe const evaluation can get us to a Value instead?
Value(Expression),
Value(Box<Expression>),
Variable(VarId),
Or(Vec<MatchPattern>),
Rest(VarId), // the ..$foo pattern

View File

@ -260,6 +260,12 @@ impl<'a> StateWorkingSet<'a> {
}
pub fn add_block(&mut self, block: Arc<Block>) -> BlockId {
log::trace!(
"block id={} added, has IR = {:?}",
self.num_blocks(),
block.ir_block.is_some()
);
self.delta.blocks.push(block);
self.num_blocks() - 1

View File

@ -17,9 +17,9 @@ pub enum CompileError {
#[error("Register {reg_id} was uninitialized when used, possibly reused.")]
#[diagnostic(
code(nu::compile::register_uninitialized),
help("this is a compiler bug. Please report it at https://github.com/nushell/nushell/issues/new")
help("this is a compiler bug. Please report it at https://github.com/nushell/nushell/issues/new\nfrom: {caller}"),
)]
RegisterUninitialized { reg_id: RegId },
RegisterUninitialized { reg_id: RegId, caller: String },
#[error("Block contains too much string data: maximum 4 GiB exceeded.")]
#[diagnostic(
@ -62,8 +62,16 @@ pub enum CompileError {
},
#[error("Attempted to set branch target of non-branch instruction.")]
#[diagnostic(code(nu::compile::set_branch_target_of_non_branch_instruction))]
SetBranchTargetOfNonBranchInstruction,
#[diagnostic(
code(nu::compile::set_branch_target_of_non_branch_instruction),
help("this is a compiler bug. Please report it at https://github.com/nushell/nushell/issues/new\nfrom: {caller}"),
)]
SetBranchTargetOfNonBranchInstruction {
instruction: String,
#[label("tried to modify: {instruction}")]
span: Span,
caller: String,
},
#[error("Instruction index out of range: {index}.")]
#[diagnostic(code(nu::compile::instruction_index_out_of_range))]

View File

@ -1,6 +1,6 @@
use std::fmt;
use crate::{engine::EngineState, DeclId, VarId};
use crate::{ast::Pattern, engine::EngineState, DeclId, VarId};
use super::{DataSlice, Instruction, IrBlock, Literal, RedirectMode};
@ -167,6 +167,17 @@ impl<'a> fmt::Display for FmtInstruction<'a> {
Instruction::BranchIf { cond, index } => {
write!(f, "{:WIDTH$} {cond}, {index}", "branch-if")
}
Instruction::Match {
pattern,
src,
index,
} => {
let pattern = FmtPattern {
engine_state: self.engine_state,
pattern,
};
write!(f, "{:WIDTH$} ({pattern}), {src}, {index}", "match")
}
Instruction::Iterate {
dst,
stream,
@ -298,3 +309,76 @@ impl<'a> fmt::Display for FmtLiteral<'a> {
}
}
}
struct FmtPattern<'a> {
engine_state: &'a EngineState,
pattern: &'a Pattern,
}
impl<'a> fmt::Display for FmtPattern<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self.pattern {
Pattern::Record(bindings) => {
f.write_str("{")?;
for (name, pattern) in bindings {
write!(
f,
"{}: {}",
name,
FmtPattern {
engine_state: self.engine_state,
pattern: &pattern.pattern,
}
)?;
}
f.write_str("}")
}
Pattern::List(bindings) => {
f.write_str("[")?;
for pattern in bindings {
write!(
f,
"{}",
FmtPattern {
engine_state: self.engine_state,
pattern: &pattern.pattern
}
)?;
}
f.write_str("]")
}
Pattern::Value(expr) => {
let string =
String::from_utf8_lossy(self.engine_state.get_span_contents(expr.span));
f.write_str(&string)
}
Pattern::Variable(var_id) => {
let variable = FmtVar::new(self.engine_state, *var_id);
write!(f, "{}", variable)
}
Pattern::Or(patterns) => {
for (index, pattern) in patterns.iter().enumerate() {
if index > 0 {
f.write_str(" | ")?;
}
write!(
f,
"{}",
FmtPattern {
engine_state: self.engine_state,
pattern: &pattern.pattern
}
)?;
}
Ok(())
}
Pattern::Rest(var_id) => {
let variable = FmtVar::new(self.engine_state, *var_id);
write!(f, "..{}", variable)
}
Pattern::IgnoreRest => f.write_str(".."),
Pattern::IgnoreValue => f.write_str("_"),
Pattern::Garbage => f.write_str("<garbage>"),
}
}
}

View File

@ -1,7 +1,7 @@
use std::sync::Arc;
use crate::{
ast::{CellPath, Expression, Operator, RangeInclusion},
ast::{CellPath, Expression, Operator, Pattern, RangeInclusion},
engine::EngineState,
BlockId, DeclId, RegId, Span, VarId,
};
@ -147,6 +147,14 @@ pub enum Instruction {
/// Branch to an offset in this block if the value of the `cond` register is a true boolean,
/// otherwise continue execution
BranchIf { cond: RegId, index: usize },
/// Match a pattern on `src`. If the pattern matches, branch to `index` after having set any
/// variables captured by the pattern. If the pattern doesn't match, continue execution. The
/// original value is preserved in `src` through this instruction.
Match {
pattern: Box<Pattern>,
src: RegId,
index: usize,
},
/// Iterate on register `stream`, putting the next value in `dst` if present, or jumping to
/// `end_index` if the iterator is finished
Iterate {

View File

@ -276,6 +276,11 @@ fn mut_variable_append_assign() {
)
}
#[test]
fn bind_in_variable_to_input() {
test_eval("3 | (4 + $in)", Eq("7"))
}
#[test]
fn if_true() {
test_eval("if true { 'foo' }", Eq("foo"))
@ -296,6 +301,50 @@ fn if_else_false() {
test_eval("if 5 < 3 { 'foo' } else { 'bar' }", Eq("bar"))
}
#[test]
fn match_empty_fallthrough() {
test_eval("match 42 { }; 'pass'", Eq("pass"))
}
#[test]
fn match_value() {
test_eval("match 1 { 1 => 'pass', 2 => 'fail' }", Eq("pass"))
}
#[test]
fn match_value_default() {
test_eval(
"match 3 { 1 => 'fail1', 2 => 'fail2', _ => 'pass' }",
Eq("pass"),
)
}
#[test]
fn match_value_fallthrough() {
test_eval("match 3 { 1 => 'fail1', 2 => 'fail2' }", Eq(""))
}
#[test]
fn match_variable() {
test_eval(
"match 'pass' { $s => { print $s }, _ => { print 'fail' } }",
Eq("pass"),
)
}
#[test]
fn match_variable_in_list() {
test_eval("match [fail pass] { [$f, $p] => { print $p } }", Eq("pass"))
}
#[test]
fn match_passthrough_input() {
test_eval(
"'yes' | match [pass fail] { [$p, ..] => (collect { |y| $y ++ $p }) }",
Eq("yespass"),
)
}
#[test]
fn while_mutate_var() {
test_eval("mut x = 2; while $x > 0 { print $x; $x -= 1 }", Eq("21"))