From e982c12a2f3336d7c0bc6a9a032275c80ea00975 Mon Sep 17 00:00:00 2001 From: Devyn Cairns Date: Thu, 11 Jul 2024 10:36:17 -0700 Subject: [PATCH] wip: compile `collect` to plain IR --- .../nu-cmd-lang/src/core_commands/collect.rs | 75 ++++++++----------- crates/nu-engine/src/compile/call.rs | 3 + crates/nu-engine/src/compile/keyword.rs | 57 ++++++++++++++ crates/nu-engine/src/eval_ir.rs | 18 ++++- crates/nu-parser/src/parser.rs | 9 --- 5 files changed, 104 insertions(+), 58 deletions(-) diff --git a/crates/nu-cmd-lang/src/core_commands/collect.rs b/crates/nu-cmd-lang/src/core_commands/collect.rs index d6282eec35..96f9c0adaa 100644 --- a/crates/nu-cmd-lang/src/core_commands/collect.rs +++ b/crates/nu-cmd-lang/src/core_commands/collect.rs @@ -1,5 +1,5 @@ -use nu_engine::{command_prelude::*, get_eval_block, redirect_env}; -use nu_protocol::{engine::Closure, DataSource, PipelineMetadata}; +use nu_engine::{command_prelude::*, get_eval_block}; +use nu_protocol::{engine::CommandType, DataSource, PipelineMetadata}; #[derive(Clone)] pub struct Collect; @@ -13,16 +13,15 @@ impl Command for Collect { Signature::build("collect") .input_output_types(vec![(Type::Any, Type::Any)]) .optional( - "closure", - SyntaxShape::Closure(Some(vec![SyntaxShape::Any])), - "The closure to run once the stream is collected.", + "block", + SyntaxShape::Block, + "The block to run once the stream is collected.", ) - .switch( - "keep-env", - "let the closure affect environment variables", - None, - ) - .category(Category::Filters) + .category(Category::Core) + } + + fn command_type(&self) -> CommandType { + CommandType::Keyword } fn usage(&self) -> &str { @@ -30,7 +29,7 @@ impl Command for Collect { } fn extra_usage(&self) -> &str { - r#"If provided, run a closure with the collected value as input. + r#"If provided, run a block with the collected value as input. The entire stream will be collected into one value in memory, so if the stream is particularly large, this can cause high memory usage."# @@ -43,7 +42,12 @@ is particularly large, this can cause high memory usage."# call: &Call, input: PipelineData, ) -> Result { - let closure: Option = call.opt(engine_state, stack, 0)?; + // This is compiled specially by the IR compiler. The code here is never used when + // running in IR mode. + let call = call.assert_ast_call()?; + let block_id = call + .positional_nth(0) + .map(|expr| expr.as_block().expect("checked through parser")); let metadata = match input.metadata() { // Remove the `FilePath` metadata, because after `collect` it's no longer necessary to @@ -58,40 +62,21 @@ is particularly large, this can cause high memory usage."# let input = input.into_value(call.head)?; let result; - if let Some(closure) = closure { - let block = engine_state.get_block(closure.block_id); - let mut stack_captures = - stack.captures_to_stack_preserve_out_dest(closure.captures.clone()); - - let mut saved_positional = None; - if let Some(var) = block.signature.get_positional(0) { - if let Some(var_id) = &var.var_id { - stack_captures.add_var(*var_id, input.clone()); - saved_positional = Some(*var_id); - } - } - + if let Some(block_id) = block_id { + let block = engine_state.get_block(block_id); let eval_block = get_eval_block(engine_state); - result = eval_block( - engine_state, - &mut stack_captures, - block, - input.into_pipeline_data_with_metadata(metadata), - ); - - if call.has_flag(engine_state, stack, "keep-env")? { - redirect_env(engine_state, stack, &stack_captures); - // for when we support `data | let x = $in;` - // remove the variables added earlier - for (var_id, _) in closure.captures { - stack_captures.remove_var(var_id); - } - if let Some(u) = saved_positional { - stack_captures.remove_var(u); - } - // add any new variables to the stack - stack.vars.extend(stack_captures.vars); + if let Some(var_id) = block.signature.get_positional(0).and_then(|var| var.var_id) { + stack.add_var(var_id, input); + result = eval_block(engine_state, stack, block, PipelineData::Empty); + stack.remove_var(var_id); + } else { + result = eval_block( + engine_state, + stack, + block, + input.into_pipeline_data_with_metadata(metadata), + ); } } else { result = Ok(input.into_pipeline_data_with_metadata(metadata)); diff --git a/crates/nu-engine/src/compile/call.rs b/crates/nu-engine/src/compile/call.rs index d9f1b8e581..92748d3b3d 100644 --- a/crates/nu-engine/src/compile/call.rs +++ b/crates/nu-engine/src/compile/call.rs @@ -50,6 +50,9 @@ pub(crate) fn compile_call( "let" | "mut" => { return compile_let(working_set, builder, call, redirect_modes, io_reg); } + "collect" => { + return compile_collect(working_set, builder, call, redirect_modes, io_reg); + } "try" => { return compile_try(working_set, builder, call, redirect_modes, io_reg); } diff --git a/crates/nu-engine/src/compile/keyword.rs b/crates/nu-engine/src/compile/keyword.rs index 12f4a54c10..f9e4e55898 100644 --- a/crates/nu-engine/src/compile/keyword.rs +++ b/crates/nu-engine/src/compile/keyword.rs @@ -350,6 +350,63 @@ pub(crate) fn compile_let( Ok(()) } +/// Compile a call to `collect` +pub(crate) fn compile_collect( + working_set: &StateWorkingSet, + builder: &mut BlockBuilder, + call: &Call, + redirect_modes: RedirectModes, + io_reg: RegId, +) -> Result<(), CompileError> { + let block_id = call + .positional_nth(0) + .map(|expr| { + expr.as_block().ok_or(CompileError::UnexpectedExpression { + expr_name: format!("{:?}", expr), + span: expr.span, + }) + }) + .transpose()?; + + if let Some(block_id) = block_id { + let block = working_set.get_block(block_id); + + if let Some(var_id) = block.signature.get_positional(0).and_then(|var| var.var_id) { + // Pseudocode: + // + // store-variable $var, %io_reg + // ...... + builder.push( + Instruction::StoreVariable { + var_id, + src: io_reg, + } + .into_spanned(block.span.unwrap_or(call.head)), + )?; + compile_block(working_set, builder, block, redirect_modes, None, io_reg) + } else { + // Pseudocode: + // + // collect %io_reg + // ...... + builder.push(Instruction::Collect { src_dst: io_reg }.into_spanned(call.head))?; + compile_block( + working_set, + builder, + block, + redirect_modes, + Some(io_reg), + io_reg, + ) + } + } else { + // Pseudocode: + // + // collect %io_reg + builder.push(Instruction::Collect { src_dst: io_reg }.into_spanned(call.head)) + } +} + /// Compile a call to `try`, setting an error handler over the evaluated block pub(crate) fn compile_try( working_set: &StateWorkingSet, diff --git a/crates/nu-engine/src/eval_ir.rs b/crates/nu-engine/src/eval_ir.rs index 56bbccee25..adee65b001 100644 --- a/crates/nu-engine/src/eval_ir.rs +++ b/crates/nu-engine/src/eval_ir.rs @@ -6,9 +6,9 @@ use nu_protocol::{ debugger::DebugContext, engine::{Argument, Closure, EngineState, ErrorHandler, Matcher, Redirection, Stack}, ir::{Call, DataSlice, Instruction, IrAstRef, IrBlock, Literal, RedirectMode}, - record, ByteStreamSource, DeclId, ErrSpan, Flag, IntoPipelineData, IntoSpanned, ListStream, - OutDest, PipelineData, PositionalArg, Range, Record, RegId, ShellError, Signals, Signature, - Span, Spanned, Type, Value, VarId, ENV_VARIABLE_ID, + record, ByteStreamSource, DataSource, DeclId, ErrSpan, Flag, IntoPipelineData, IntoSpanned, + ListStream, OutDest, PipelineData, PipelineMetadata, PositionalArg, Range, Record, RegId, + ShellError, Signals, Signature, Span, Spanned, Type, Value, VarId, ENV_VARIABLE_ID, }; use nu_utils::IgnoreCaseExt; @@ -1332,9 +1332,19 @@ fn get_env_var_name_case_insensitive<'a>(ctx: &mut EvalContext<'_>, key: &'a str } /// Helper to collect values into [`PipelineData`], preserving original span and metadata +/// +/// The metadata is removed if it is the file data source, as that's just meant to mark streams. fn collect(data: PipelineData, fallback_span: Span) -> Result { let span = data.span().unwrap_or(fallback_span); - let metadata = data.metadata(); + let metadata = match data.metadata() { + // Remove the `FilePath` metadata, because after `collect` it's no longer necessary to + // check where some input came from. + Some(PipelineMetadata { + data_source: DataSource::FilePath(_), + content_type: None, + }) => None, + other => other, + }; let value = data.into_value(span)?; Ok(PipelineData::Value(value, metadata)) } diff --git a/crates/nu-parser/src/parser.rs b/crates/nu-parser/src/parser.rs index 9ba7038987..103b5c9974 100644 --- a/crates/nu-parser/src/parser.rs +++ b/crates/nu-parser/src/parser.rs @@ -6340,15 +6340,6 @@ fn wrap_expr_with_collect(working_set: &mut StateWorkingSet, expr: &Expression) Type::Any, ))); - output.push(Argument::Named(( - Spanned { - item: "keep-env".to_string(), - span: Span::new(0, 0), - }, - None, - None, - ))); - // The containing, synthetic call to `collect`. // We don't want to have a real span as it will confuse flattening // The args are where we'll get the real info