abandon collect as keyword command, but change $in to use a new Collect expr type instead

This commit is contained in:
Devyn Cairns 2024-07-11 17:27:26 -07:00
parent e982c12a2f
commit 59b217443a
20 changed files with 185 additions and 149 deletions

View File

@ -429,6 +429,14 @@ fn find_matching_block_end_in_expr(
)
}),
Expr::Collect(_, expr) => find_matching_block_end_in_expr(
line,
working_set,
expr,
global_span_offset,
global_cursor_offset,
),
Expr::Block(block_id)
| Expr::Closure(block_id)
| Expr::RowCondition(block_id)

View File

@ -1,5 +1,5 @@
use nu_engine::{command_prelude::*, get_eval_block};
use nu_protocol::{engine::CommandType, DataSource, PipelineMetadata};
use nu_engine::{command_prelude::*, get_eval_block, redirect_env};
use nu_protocol::{engine::Closure, DataSource, PipelineMetadata};
#[derive(Clone)]
pub struct Collect;
@ -13,15 +13,16 @@ impl Command for Collect {
Signature::build("collect")
.input_output_types(vec![(Type::Any, Type::Any)])
.optional(
"block",
SyntaxShape::Block,
"The block to run once the stream is collected.",
"closure",
SyntaxShape::Closure(Some(vec![SyntaxShape::Any])),
"The closure to run once the stream is collected.",
)
.category(Category::Core)
}
fn command_type(&self) -> CommandType {
CommandType::Keyword
.switch(
"keep-env",
"let the closure affect environment variables",
None,
)
.category(Category::Filters)
}
fn usage(&self) -> &str {
@ -29,7 +30,7 @@ impl Command for Collect {
}
fn extra_usage(&self) -> &str {
r#"If provided, run a block with the collected value as input.
r#"If provided, run a closure with the collected value as input.
The entire stream will be collected into one value in memory, so if the stream
is particularly large, this can cause high memory usage."#
@ -42,12 +43,7 @@ is particularly large, this can cause high memory usage."#
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
// This is compiled specially by the IR compiler. The code here is never used when
// running in IR mode.
let call = call.assert_ast_call()?;
let block_id = call
.positional_nth(0)
.map(|expr| expr.as_block().expect("checked through parser"));
let closure: Option<Closure> = call.opt(engine_state, stack, 0)?;
let metadata = match input.metadata() {
// Remove the `FilePath` metadata, because after `collect` it's no longer necessary to
@ -62,21 +58,40 @@ is particularly large, this can cause high memory usage."#
let input = input.into_value(call.head)?;
let result;
if let Some(block_id) = block_id {
let block = engine_state.get_block(block_id);
if let Some(closure) = closure {
let block = engine_state.get_block(closure.block_id);
let mut stack_captures =
stack.captures_to_stack_preserve_out_dest(closure.captures.clone());
let mut saved_positional = None;
if let Some(var) = block.signature.get_positional(0) {
if let Some(var_id) = &var.var_id {
stack_captures.add_var(*var_id, input.clone());
saved_positional = Some(*var_id);
}
}
let eval_block = get_eval_block(engine_state);
if let Some(var_id) = block.signature.get_positional(0).and_then(|var| var.var_id) {
stack.add_var(var_id, input);
result = eval_block(engine_state, stack, block, PipelineData::Empty);
stack.remove_var(var_id);
} else {
result = eval_block(
engine_state,
stack,
block,
input.into_pipeline_data_with_metadata(metadata),
);
result = eval_block(
engine_state,
&mut stack_captures,
block,
input.into_pipeline_data_with_metadata(metadata),
);
if call.has_flag(engine_state, stack, "keep-env")? {
redirect_env(engine_state, stack, &stack_captures);
// for when we support `data | let x = $in;`
// remove the variables added earlier
for (var_id, _) in closure.captures {
stack_captures.remove_var(var_id);
}
if let Some(u) = saved_positional {
stack_captures.remove_var(u);
}
// add any new variables to the stack
stack.vars.extend(stack_captures.vars);
}
} else {
result = Ok(input.into_pipeline_data_with_metadata(metadata));

View File

@ -204,6 +204,7 @@ impl BlockBuilder {
Instruction::Drain { src } => allocate(&[*src], &[]),
Instruction::LoadVariable { dst, var_id: _ } => allocate(&[], &[*dst]),
Instruction::StoreVariable { var_id: _, src } => allocate(&[*src], &[]),
Instruction::DropVariable { var_id: _ } => Ok(()),
Instruction::LoadEnv { dst, key: _ } => allocate(&[], &[*dst]),
Instruction::LoadEnvOpt { dst, key: _ } => allocate(&[], &[*dst]),
Instruction::StoreEnv { key: _, src } => allocate(&[*src], &[]),

View File

@ -50,9 +50,6 @@ pub(crate) fn compile_call(
"let" | "mut" => {
return compile_let(working_set, builder, call, redirect_modes, io_reg);
}
"collect" => {
return compile_collect(working_set, builder, call, redirect_modes, io_reg);
}
"try" => {
return compile_try(working_set, builder, call, redirect_modes, io_reg);
}

View File

@ -171,6 +171,28 @@ pub(crate) fn compile_expression(
Err(CompileError::UnsupportedOperatorExpression { span: op.span })
}
}
Expr::Collect(var_id, expr) => {
let in_reg = match in_reg {
Some(in_reg) => in_reg,
None => {
let reg_id = builder.next_register()?;
builder.load_empty(reg_id)?;
reg_id
}
};
// Implicit collect
builder.push(
Instruction::StoreVariable {
var_id: *var_id,
src: in_reg,
}
.into_spanned(expr.span),
)?;
compile_expression(working_set, builder, expr, redirect_modes, None, out_reg)?;
// Clean it up afterward
builder.push(Instruction::DropVariable { var_id: *var_id }.into_spanned(expr.span))?;
Ok(())
}
Expr::Subexpression(block_id) => {
let block = working_set.get_block(*block_id);
compile_block(working_set, builder, block, redirect_modes, in_reg, out_reg)

View File

@ -350,63 +350,6 @@ pub(crate) fn compile_let(
Ok(())
}
/// Compile a call to `collect`
pub(crate) fn compile_collect(
working_set: &StateWorkingSet,
builder: &mut BlockBuilder,
call: &Call,
redirect_modes: RedirectModes,
io_reg: RegId,
) -> Result<(), CompileError> {
let block_id = call
.positional_nth(0)
.map(|expr| {
expr.as_block().ok_or(CompileError::UnexpectedExpression {
expr_name: format!("{:?}", expr),
span: expr.span,
})
})
.transpose()?;
if let Some(block_id) = block_id {
let block = working_set.get_block(block_id);
if let Some(var_id) = block.signature.get_positional(0).and_then(|var| var.var_id) {
// Pseudocode:
//
// store-variable $var, %io_reg
// ...<block>...
builder.push(
Instruction::StoreVariable {
var_id,
src: io_reg,
}
.into_spanned(block.span.unwrap_or(call.head)),
)?;
compile_block(working_set, builder, block, redirect_modes, None, io_reg)
} else {
// Pseudocode:
//
// collect %io_reg
// ...<block>...
builder.push(Instruction::Collect { src_dst: io_reg }.into_spanned(call.head))?;
compile_block(
working_set,
builder,
block,
redirect_modes,
Some(io_reg),
io_reg,
)
}
} else {
// Pseudocode:
//
// collect %io_reg
builder.push(Instruction::Collect { src_dst: io_reg }.into_spanned(call.head))
}
}
/// Compile a call to `try`, setting an error handler over the evaluated block
pub(crate) fn compile_try(
working_set: &StateWorkingSet,

View File

@ -259,6 +259,10 @@ pub fn eval_expression_with_input<D: DebugContext>(
input = eval_external(engine_state, stack, head, args, input)?;
}
Expr::Collect(var_id, expr) => {
input = eval_collect::<D>(engine_state, stack, *var_id, expr, input)?;
}
Expr::Subexpression(block_id) => {
let block = engine_state.get_block(*block_id);
// FIXME: protect this collect with ctrl-c
@ -605,6 +609,26 @@ pub fn eval_block<D: DebugContext>(
Ok(input)
}
pub fn eval_collect<D: DebugContext>(
engine_state: &EngineState,
stack: &mut Stack,
var_id: VarId,
expr: &Expression,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
// Evaluate the expression with the variable set to the collected input
let span = input.span().unwrap_or(Span::unknown());
stack.add_var(var_id, input.into_value(span)?);
let result = eval_expression_with_input::<D>(engine_state, stack, expr, PipelineData::empty())
.map(|(result, _failed)| result);
stack.remove_var(var_id);
result
}
pub fn eval_subexpression<D: DebugContext>(
engine_state: &EngineState,
stack: &mut Stack,
@ -729,6 +753,18 @@ impl Eval for EvalRuntime {
eval_external(engine_state, stack, head, args, PipelineData::empty())?.into_value(span)
}
fn eval_collect<D: DebugContext>(
engine_state: &EngineState,
stack: &mut Stack,
var_id: VarId,
expr: &Expression,
) -> Result<Value, ShellError> {
// It's a little bizarre, but the expression can still have some kind of result even with
// nothing input
eval_collect::<D>(engine_state, stack, var_id, expr, PipelineData::empty())?
.into_value(expr.span)
}
fn eval_subexpression<D: DebugContext>(
engine_state: &EngineState,
stack: &mut Stack,

View File

@ -336,6 +336,10 @@ fn eval_instruction<D: DebugContext>(
ctx.stack.add_var(*var_id, value);
Ok(Continue)
}
Instruction::DropVariable { var_id } => {
ctx.stack.remove_var(*var_id);
Ok(Continue)
}
Instruction::LoadEnv { dst, key } => {
let key = ctx.get_str(*key, *span)?;
if let Some(value) = get_env_var_case_insensitive(ctx, key) {

View File

@ -189,6 +189,9 @@ fn flatten_expression_into(
));
flatten_expression_into(working_set, not, output);
}
Expr::Collect(_, expr) => {
flatten_expression_into(working_set, expr, output);
}
Expr::Closure(block_id) => {
let outer_span = expr.span;

View File

@ -6220,6 +6220,10 @@ pub fn discover_captures_in_expr(
discover_captures_in_expr(working_set, &match_.1, seen, seen_blocks, output)?;
}
}
Expr::Collect(var_id, expr) => {
seen.push(*var_id);
discover_captures_in_expr(working_set, expr, seen, seen_blocks, output)?
}
Expr::RowCondition(block_id) | Expr::Subexpression(block_id) => {
let block = working_set.get_block(*block_id);
@ -6305,58 +6309,20 @@ fn wrap_element_with_collect(
fn wrap_expr_with_collect(working_set: &mut StateWorkingSet, expr: &Expression) -> Expression {
let span = expr.span;
if let Some(decl_id) = working_set.find_decl(b"collect") {
let mut output = vec![];
// IN_VARIABLE_ID should get replaced with a unique variable, so that we don't have to
// execute as a closure
let var_id = working_set.add_variable(b"$in".into(), expr.span, Type::Any, false);
let mut expr = expr.clone();
expr.replace_in_variable(working_set, var_id);
// IN_VARIABLE_ID should get replaced with a unique variable, so that we don't have to
// execute as a closure
let var_id = working_set.add_variable(b"$in".into(), expr.span, Type::Any, false);
let mut expr = expr.clone();
expr.replace_in_variable(working_set, var_id);
let mut signature = Signature::new("");
signature.required_positional.push(PositionalArg {
var_id: Some(var_id),
name: "$in".into(),
desc: String::new(),
shape: SyntaxShape::Any,
default_value: None,
});
let mut block = Block {
pipelines: vec![Pipeline::from_vec(vec![expr.clone()])],
signature: Box::new(signature),
..Default::default()
};
compile_block(working_set, &mut block);
let block_id = working_set.add_block(Arc::new(block));
output.push(Argument::Positional(Expression::new(
working_set,
Expr::Closure(block_id),
span,
Type::Any,
)));
// The containing, synthetic call to `collect`.
// We don't want to have a real span as it will confuse flattening
// The args are where we'll get the real info
Expression::new(
working_set,
Expr::Call(Box::new(Call {
head: Span::new(0, 0),
arguments: output,
decl_id,
parser_info: HashMap::new(),
})),
span,
Type::Any,
)
} else {
Expression::garbage(working_set, span)
}
// Bind the custom `$in` variable for that particular expression
Expression::new(
working_set,
Expr::Collect(var_id, Box::new(expr.clone())),
span,
// We can expect it to have the same result type
expr.ty,
)
}
// Parses a vector of u8 to create an AST Block. If a file name is given, then

View File

@ -83,7 +83,7 @@ impl Block {
pub fn replace_in_variable(
&mut self,
working_set: &mut StateWorkingSet<'_>,
new_var_id: usize,
new_var_id: VarId,
) {
for pipeline in self.pipelines.iter_mut() {
if let Some(element) = pipeline.elements.first_mut() {

View File

@ -25,6 +25,7 @@ pub enum Expr {
RowCondition(BlockId),
UnaryNot(Box<Expression>),
BinaryOp(Box<Expression>, Box<Expression>, Box<Expression>), //lhs, op, rhs
Collect(VarId, Box<Expression>),
Subexpression(BlockId),
Block(BlockId),
Closure(BlockId),
@ -65,11 +66,13 @@ impl Expr {
&self,
working_set: &StateWorkingSet,
) -> (Option<OutDest>, Option<OutDest>) {
// Usages of `$in` will be wrapped by a `collect` call by the parser,
// so we do not have to worry about that when considering
// which of the expressions below may consume pipeline output.
match self {
Expr::Call(call) => working_set.get_decl(call.decl_id).pipe_redirection(),
Expr::Collect(_, _) => {
// A collect expression always has default redirection, it's just going to collect
// stdout unless another type of redirection is specified
(None, None)
},
Expr::Subexpression(block_id) | Expr::Block(block_id) => working_set
.get_block(*block_id)
.pipe_redirection(working_set),

View File

@ -224,6 +224,9 @@ impl Expression {
Expr::Signature(_) => false,
Expr::String(_) => false,
Expr::RawString(_) => false,
// A `$in` variable found within a `Collect` is local, as it's already been wrapped
// This is probably unlikely to happen anyway - the expressions are wrapped depth-first
Expr::Collect(_, _) => false,
Expr::RowCondition(block_id) | Expr::Subexpression(block_id) => {
let block = working_set.get_block(*block_id);
@ -387,6 +390,7 @@ impl Expression {
i.replace_span(working_set, replaced, new_span)
}
}
Expr::Collect(_, expr) => expr.replace_span(working_set, replaced, new_span),
Expr::RowCondition(block_id) | Expr::Subexpression(block_id) => {
let mut block = (**working_set.get_block(*block_id)).clone();
@ -457,6 +461,8 @@ impl Expression {
Expr::Operator(_) => {}
// These have their own input
Expr::Block(_) | Expr::Closure(_) => {}
// `$in` in `Collect` has already been handled, so we don't need to check further
Expr::Collect(_, _) => {}
Expr::RowCondition(block_id) | Expr::Subexpression(block_id) => {
let mut block = Block::clone(working_set.get_block(*block_id));
block.replace_in_variable(working_set, new_var_id);

View File

@ -1,4 +1,4 @@
use crate::{ast::Expression, engine::StateWorkingSet, OutDest, Span};
use crate::{ast::Expression, engine::StateWorkingSet, OutDest, Span, VarId};
use serde::{Deserialize, Serialize};
use std::fmt::Display;
@ -66,7 +66,7 @@ impl RedirectionTarget {
pub fn replace_in_variable(
&mut self,
working_set: &mut StateWorkingSet<'_>,
new_var_id: usize,
new_var_id: VarId,
) {
match self {
RedirectionTarget::File { expr, .. } => {
@ -92,7 +92,7 @@ impl PipelineRedirection {
pub fn replace_in_variable(
&mut self,
working_set: &mut StateWorkingSet<'_>,
new_var_id: usize,
new_var_id: VarId,
) {
match self {
PipelineRedirection::Single { source: _, target } => {
@ -154,7 +154,7 @@ impl PipelineElement {
pub fn replace_in_variable(
&mut self,
working_set: &mut StateWorkingSet<'_>,
new_var_id: usize,
new_var_id: VarId,
) {
self.expr.replace_in_variable(working_set, new_var_id);
if let Some(redirection) = &mut self.redirection {

View File

@ -259,6 +259,7 @@ fn expr_to_string(engine_state: &EngineState, expr: &Expr) -> String {
Expr::String(_) | Expr::RawString(_) => "string".to_string(),
Expr::StringInterpolation(_) => "string interpolation".to_string(),
Expr::GlobInterpolation(_, _) => "glob interpolation".to_string(),
Expr::Collect(_, _) => "collect".to_string(),
Expr::Subexpression(_) => "subexpression".to_string(),
Expr::Table(_) => "table".to_string(),
Expr::UnaryNot(_) => "unary not".to_string(),

View File

@ -159,6 +159,9 @@ pub trait Eval {
Expr::ExternalCall(head, args) => {
Self::eval_external_call(state, mut_state, head, args, expr_span)
}
Expr::Collect(var_id, expr) => {
Self::eval_collect::<D>(state, mut_state, *var_id, expr)
}
Expr::Subexpression(block_id) => {
Self::eval_subexpression::<D>(state, mut_state, *block_id, expr_span)
}
@ -356,6 +359,13 @@ pub trait Eval {
span: Span,
) -> Result<Value, ShellError>;
fn eval_collect<D: DebugContext>(
state: Self::State<'_>,
mut_state: &mut Self::MutState,
var_id: VarId,
expr: &Expression,
) -> Result<Value, ShellError>;
fn eval_subexpression<D: DebugContext>(
state: Self::State<'_>,
mut_state: &mut Self::MutState,

View File

@ -422,6 +422,15 @@ impl Eval for EvalConst {
Err(ShellError::NotAConstant { span })
}
fn eval_collect<D: DebugContext>(
_: &StateWorkingSet,
_: &mut (),
_var_id: VarId,
expr: &Expression,
) -> Result<Value, ShellError> {
Err(ShellError::NotAConstant { span: expr.span })
}
fn eval_subexpression<D: DebugContext>(
working_set: &StateWorkingSet,
_: &mut (),

View File

@ -102,6 +102,10 @@ impl<'a> fmt::Display for FmtInstruction<'a> {
let var = FmtVar::new(self.engine_state, *var_id);
write!(f, "{:WIDTH$} {var}, {src}", "store-variable")
}
Instruction::DropVariable { var_id } => {
let var = FmtVar::new(self.engine_state, *var_id);
write!(f, "{:WIDTH$} {var}", "drop-variable")
}
Instruction::LoadEnv { dst, key } => {
let key = FmtData(self.data, *key);
write!(f, "{:WIDTH$} {dst}, {key}", "load-env")

View File

@ -127,6 +127,8 @@ pub enum Instruction {
LoadVariable { dst: RegId, var_id: VarId },
/// Store the value of a variable from the `src` register
StoreVariable { var_id: VarId, src: RegId },
/// Remove a variable from the stack, freeing up whatever resources were associated with it
DropVariable { var_id: VarId },
/// Load the value of an environment variable into the `dst` register
LoadEnv { dst: RegId, key: DataSlice },
/// Load the value of an environment variable into the `dst` register, or `Nothing` if it

View File

@ -329,6 +329,12 @@ fn convert_to_value(
msg: "glob interpolation not supported in nuon".into(),
span: expr.span,
}),
Expr::Collect(..) => Err(ShellError::OutsideSpannedLabeledError {
src: original_text.to_string(),
error: "Error when loading".into(),
msg: "`$in` not supported in nuon".into(),
span: expr.span,
}),
Expr::Subexpression(..) => Err(ShellError::OutsideSpannedLabeledError {
src: original_text.to_string(),
error: "Error when loading".into(),