diff --git a/crates/nu-engine/src/compile/mod.rs b/crates/nu-engine/src/compile/mod.rs index b93e922674..4c94e8f1ff 100644 --- a/crates/nu-engine/src/compile/mod.rs +++ b/crates/nu-engine/src/compile/mod.rs @@ -1,12 +1,10 @@ -use std::sync::Arc; - use nu_protocol::{ ast::{ Argument, Block, Call, CellPath, Expr, Expression, Operator, PathMember, Pipeline, PipelineRedirection, RedirectionSource, RedirectionTarget, }, engine::StateWorkingSet, - ir::{Instruction, IrBlock, Literal, RedirectMode}, + ir::{DataSlice, Instruction, IrBlock, Literal, RedirectMode}, IntoSpanned, OutDest, RegId, ShellError, Span, Spanned, ENV_VARIABLE_ID, }; @@ -264,7 +262,10 @@ fn compile_expression( Expr::Bool(b) => lit(builder, Literal::Bool(*b)), Expr::Int(i) => lit(builder, Literal::Int(*i)), Expr::Float(f) => lit(builder, Literal::Float(*f)), - Expr::Binary(bin) => lit(builder, Literal::Binary(bin.as_slice().into())), + Expr::Binary(bin) => { + let data_slice = builder.data(bin)?; + lit(builder, Literal::Binary(data_slice)) + } Expr::Range(_) => Err(CompileError::Todo("Range")), Expr::Var(var_id) => builder.push( Instruction::LoadVariable { @@ -333,29 +334,44 @@ fn compile_expression( Expr::Keyword(_) => Err(CompileError::Todo("Keyword")), Expr::ValueWithUnit(_) => Err(CompileError::Todo("ValueWithUnit")), Expr::DateTime(_) => Err(CompileError::Todo("DateTime")), - Expr::Filepath(path, no_expand) => lit( - builder, - Literal::Filepath { - val: path.as_str().into(), - no_expand: *no_expand, - }, - ), - Expr::Directory(path, no_expand) => lit( - builder, - Literal::Directory { - val: path.as_str().into(), - no_expand: *no_expand, - }, - ), - Expr::GlobPattern(path, no_expand) => lit( - builder, - Literal::GlobPattern { - val: path.as_str().into(), - no_expand: *no_expand, - }, - ), - Expr::String(s) => lit(builder, Literal::String(s.as_str().into())), - Expr::RawString(rs) => lit(builder, Literal::RawString(rs.as_str().into())), + Expr::Filepath(path, no_expand) => { + let val = builder.data(path)?; + lit( + builder, + Literal::Filepath { + val, + no_expand: *no_expand, + }, + ) + } + Expr::Directory(path, no_expand) => { + let val = builder.data(path)?; + lit( + builder, + Literal::Directory { + val, + no_expand: *no_expand, + }, + ) + } + Expr::GlobPattern(path, no_expand) => { + let val = builder.data(path)?; + lit( + builder, + Literal::GlobPattern { + val, + no_expand: *no_expand, + }, + ) + } + Expr::String(s) => { + let data_slice = builder.data(s)?; + lit(builder, Literal::String(data_slice)) + } + Expr::RawString(rs) => { + let data_slice = builder.data(rs)?; + lit(builder, Literal::RawString(data_slice)) + } Expr::CellPath(path) => lit(builder, Literal::CellPath(Box::new(path.clone()))), Expr::FullCellPath(full_cell_path) => { if matches!(full_cell_path.head.expr, Expr::Var(ENV_VARIABLE_ID)) { @@ -411,9 +427,9 @@ fn compile_call( // We could technically compile anything that isn't another call safely without worrying about // the argument state, but we'd have to check all of that first and it just isn't really worth // it. - enum CompiledArg { + enum CompiledArg<'a> { Positional(RegId, Span), - Named(Arc, Option, Span), + Named(&'a str, Option, Span), Spread(RegId, Span), } @@ -443,11 +459,9 @@ fn compile_call( arg_reg.expect("expr() None in non-Named"), arg.span(), )), - Argument::Named((name, _, _)) => compiled_args.push(CompiledArg::Named( - name.item.as_str().into(), - arg_reg, - arg.span(), - )), + Argument::Named((name, _, _)) => { + compiled_args.push(CompiledArg::Named(name.item.as_str(), arg_reg, arg.span())) + } Argument::Unknown(_) => return Err(CompileError::Garbage), Argument::Spread(_) => compiled_args.push(CompiledArg::Spread( arg_reg.expect("expr() None in non-Named"), @@ -463,9 +477,11 @@ fn compile_call( builder.push(Instruction::PushPositional { src: reg }.into_spanned(span))? } CompiledArg::Named(name, Some(reg), span) => { + let name = builder.data(name)?; builder.push(Instruction::PushNamed { name, src: reg }.into_spanned(span))? } CompiledArg::Named(name, None, span) => { + let name = builder.data(name)?; builder.push(Instruction::PushFlag { name }.into_spanned(span))? } CompiledArg::Spread(reg, span) => { @@ -566,7 +582,7 @@ fn compile_load_env( ) } else { let (key, optional) = match &path[0] { - PathMember::String { val, optional, .. } => (val.as_str().into(), *optional), + PathMember::String { val, optional, .. } => (builder.data(val)?, *optional), PathMember::Int { span, .. } => return Err(CompileError::AccessEnvByInt(*span)), }; let tail = &path[1..]; @@ -603,6 +619,7 @@ fn compile_load_env( enum CompileError { RegisterOverflow, RegisterUninitialized(RegId), + DataOverflow, InvalidRedirectMode, Garbage, UnsupportedOperatorExpression, @@ -617,6 +634,9 @@ impl CompileError { CompileError::RegisterUninitialized(reg_id) => { format!("register {reg_id} is uninitialized when used, possibly reused") } + CompileError::DataOverflow => { + format!("block contains too much string data: maximum 4 GiB exceeded") + } CompileError::InvalidRedirectMode => { "invalid redirect mode: File should not be specified by commands".into() } @@ -639,6 +659,7 @@ impl CompileError { struct BlockBuilder { instructions: Vec, spans: Vec, + data: Vec, register_allocation_state: Vec, } @@ -648,6 +669,7 @@ impl BlockBuilder { BlockBuilder { instructions: vec![], spans: vec![], + data: vec![], register_allocation_state: vec![true], } } @@ -732,6 +754,12 @@ impl BlockBuilder { decl_id: _, src_dst: _, } => (), + Instruction::ListPush { src_dst: _, item } => self.free_register(*item)?, + Instruction::RecordInsert { + src_dst: _, + key: _, + val, + } => self.free_register(*val)?, Instruction::BinaryOp { lhs_dst: _, op: _, @@ -786,11 +814,27 @@ impl BlockBuilder { self.load_literal(reg_id, Literal::Nothing.into_spanned(Span::unknown())) } + /// Add data to the `data` array and return a [`DataSlice`] referencing it. + fn data(&mut self, data: impl AsRef<[u8]>) -> Result { + let start = self.data.len(); + if start + data.as_ref().len() < u32::MAX as usize { + let slice = DataSlice { + start: start as u32, + len: data.as_ref().len() as u32, + }; + self.data.extend_from_slice(data.as_ref()); + Ok(slice) + } else { + Err(CompileError::DataOverflow) + } + } + /// Consume the builder and produce the final [`IrBlock`]. fn finish(self) -> IrBlock { IrBlock { instructions: self.instructions, spans: self.spans, + data: self.data.into(), register_count: self.register_allocation_state.len(), } } diff --git a/crates/nu-engine/src/eval_ir.rs b/crates/nu-engine/src/eval_ir.rs index b174ac82cf..b0a00d9ec5 100644 --- a/crates/nu-engine/src/eval_ir.rs +++ b/crates/nu-engine/src/eval_ir.rs @@ -1,13 +1,13 @@ -use std::fs::File; +use std::{fs::File, sync::Arc}; use nu_path::expand_path_with; use nu_protocol::{ ast::{Bits, Block, Boolean, CellPath, Comparison, Math, Operator}, debugger::DebugContext, engine::{Argument, Closure, EngineState, Redirection, Stack}, - ir::{Call, Instruction, IrBlock, Literal, RedirectMode}, - DeclId, IntoPipelineData, IntoSpanned, OutDest, PipelineData, RegId, ShellError, Span, Value, - VarId, + ir::{Call, DataSlice, Instruction, IrBlock, Literal, RedirectMode}, + DeclId, IntoPipelineData, IntoSpanned, OutDest, PipelineData, Range, Record, RegId, ShellError, + Span, Value, VarId, }; use crate::eval::is_automatic_env_var; @@ -31,6 +31,7 @@ pub fn eval_ir_block( &mut EvalContext { engine_state, stack, + data: &ir_block.data, args_base, redirect_out: None, redirect_err: None, @@ -62,6 +63,7 @@ pub fn eval_ir_block( struct EvalContext<'a> { engine_state: &'a EngineState, stack: &'a mut Stack, + data: &'a Arc<[u8]>, /// Base index on the argument stack to reset to after a call args_base: usize, /// State set by redirect-out @@ -89,6 +91,14 @@ impl<'a> EvalContext<'a> { let span = data.span().unwrap_or(fallback_span); data.into_value(span) } + + /// Get a string from data or produce evaluation error if it's invalid UTF-8 + fn get_str(&self, slice: DataSlice, error_span: Span) -> Result<&'a str, ShellError> { + std::str::from_utf8(&self.data[slice]).map_err(|_| ShellError::IrEvalError { + msg: format!("data slice does not refer to valid UTF-8: {slice:?}"), + span: Some(error_span), + }) + } } /// Eval an IR block on the provided slice of registers. @@ -108,7 +118,10 @@ fn eval_ir_block_impl( while pc < ir_block.instructions.len() { let instruction = &ir_block.instructions[pc]; let span = &ir_block.spans[pc]; - log::trace!("{pc:-4}: {}", instruction.display(ctx.engine_state)); + log::trace!( + "{pc:-4}: {}", + instruction.display(ctx.engine_state, ctx.data) + ); match eval_instruction(ctx, instruction, span)? { InstructionResult::Continue => { pc += 1; @@ -201,17 +214,19 @@ fn eval_instruction( Ok(Continue) } Instruction::LoadEnv { dst, key } => { + let key = ctx.get_str(*key, *span)?; if let Some(value) = ctx.stack.get_env_var(ctx.engine_state, key) { ctx.put_reg(*dst, value.into_pipeline_data()); Ok(Continue) } else { Err(ShellError::EnvVarNotFoundAtRuntime { - envvar_name: key.clone().into(), + envvar_name: key.into(), span: *span, }) } } Instruction::LoadEnvOpt { dst, key } => { + let key = ctx.get_str(*key, *span)?; let value = ctx .stack .get_env_var(ctx.engine_state, key) @@ -220,13 +235,14 @@ fn eval_instruction( Ok(Continue) } Instruction::StoreEnv { key, src } => { + let key = ctx.get_str(*key, *span)?; let value = ctx.collect_reg(*src, *span)?; if !is_automatic_env_var(key) { - ctx.stack.add_env_var(key.clone().into(), value); + ctx.stack.add_env_var(key.into(), value); Ok(Continue) } else { Err(ShellError::AutomaticEnvVarSetManually { - envvar_name: key.clone().into(), + envvar_name: key.into(), span: *span, }) } @@ -247,7 +263,8 @@ fn eval_instruction( } Instruction::PushFlag { name } => { ctx.stack.argument_stack.push(Argument::Flag { - name: name.clone(), + data: ctx.data.clone(), + name: *name, span: *span, }); Ok(Continue) @@ -255,7 +272,8 @@ fn eval_instruction( Instruction::PushNamed { name, src } => { let val = ctx.collect_reg(*src, *span)?; ctx.stack.argument_stack.push(Argument::Named { - name: name.clone(), + data: ctx.data.clone(), + name: *name, span: *span, val, }); @@ -277,6 +295,27 @@ fn eval_instruction( ctx.put_reg(*src_dst, result); Ok(Continue) } + Instruction::ListPush { src_dst, item } => { + let list_value = ctx.collect_reg(*src_dst, *span)?; + let item = ctx.collect_reg(*item, *span)?; + let list_span = list_value.span(); + let mut list = list_value.into_list()?; + list.push(item); + ctx.put_reg(*src_dst, Value::list(list, list_span).into_pipeline_data()); + Ok(Continue) + } + Instruction::RecordInsert { src_dst, key, val } => { + let record_value = ctx.collect_reg(*src_dst, *span)?; + let val = ctx.collect_reg(*val, *span)?; + let record_span = record_value.span(); + let mut record = record_value.into_record()?; + record.insert(ctx.get_str(*key, *span)?, val); + ctx.put_reg( + *src_dst, + Value::record(record, record_span).into_pipeline_data(), + ); + Ok(Continue) + } Instruction::BinaryOp { lhs_dst, op, rhs } => binary_op(ctx, *lhs_dst, op, *rhs, *span), Instruction::FollowCellPath { src_dst, path } => { let data = ctx.take_reg(*src_dst); @@ -378,7 +417,7 @@ fn literal_value( Literal::Bool(b) => Value::bool(*b, span), Literal::Int(i) => Value::int(*i, span), Literal::Float(f) => Value::float(*f, span), - Literal::Binary(bin) => Value::binary(bin.clone(), span), + Literal::Binary(bin) => Value::binary(&ctx.data[*bin], span), Literal::Block(block_id) => Value::closure( Closure { block_id: *block_id, @@ -401,22 +440,30 @@ fn literal_value( span, ) } - Literal::List(literals) => { - let mut vec = Vec::with_capacity(literals.len()); - for elem in literals.iter() { - vec.push(literal_value(ctx, &elem.item, elem.span)?); - } - Value::list(vec, span) + Literal::Range { + start, + step, + end, + inclusion, + } => { + let start = ctx.collect_reg(*start, span)?; + let step = ctx.collect_reg(*step, span)?; + let end = ctx.collect_reg(*end, span)?; + let range = Range::new(start, step, end, *inclusion, span)?; + Value::range(range, span) } + Literal::List { capacity } => Value::list(Vec::with_capacity(*capacity), span), + Literal::Record { capacity } => Value::record(Record::with_capacity(*capacity), span), Literal::Filepath { val: path, no_expand, } => { + let path = ctx.get_str(*path, span)?; if *no_expand { - Value::string(path.as_ref(), span) + Value::string(path, span) } else { let cwd = ctx.engine_state.cwd(Some(ctx.stack))?; - let path = expand_path_with(path.as_ref(), cwd, true); + let path = expand_path_with(path, cwd, true); Value::string(path.to_string_lossy(), span) } @@ -425,20 +472,23 @@ fn literal_value( val: path, no_expand, } => { - if path.as_ref() == "-" { + let path = ctx.get_str(*path, span)?; + if path == "-" { Value::string("-", span) } else if *no_expand { - Value::string(path.as_ref(), span) + Value::string(path, span) } else { let cwd = ctx.engine_state.cwd(Some(ctx.stack)).unwrap_or_default(); - let path = expand_path_with(path.as_ref(), cwd, true); + let path = expand_path_with(path, cwd, true); Value::string(path.to_string_lossy(), span) } } - Literal::GlobPattern { val, no_expand } => Value::glob(val.as_ref(), *no_expand, span), - Literal::String(s) => Value::string(s.clone(), span), - Literal::RawString(s) => Value::string(s.clone(), span), + Literal::GlobPattern { val, no_expand } => { + Value::glob(ctx.get_str(*val, span)?, *no_expand, span) + } + Literal::String(s) => Value::string(ctx.get_str(*s, span)?, span), + Literal::RawString(s) => Value::string(ctx.get_str(*s, span)?, span), Literal::CellPath(path) => Value::cell_path(CellPath::clone(&path), span), Literal::Nothing => Value::nothing(span), }) diff --git a/crates/nu-parser/src/known_external.rs b/crates/nu-parser/src/known_external.rs index 11b5a3128d..75d141c90c 100644 --- a/crates/nu-parser/src/known_external.rs +++ b/crates/nu-parser/src/known_external.rs @@ -159,17 +159,28 @@ fn ir_call_to_extern_call( // Add the arguments, reformatting named arguments into string positionals for index in 0..call.args_len { match &call.arguments(stack)[index] { - engine::Argument::Flag { name, span } => { + engine::Argument::Flag { data, name, span } => { let name_arg = engine::Argument::Positional { span: *span, - val: Value::string(name.as_ref(), *span), + val: Value::string( + std::str::from_utf8(&data[*name]).expect("invalid flag name"), + *span, + ), }; extern_call.add_argument(stack, name_arg); } - engine::Argument::Named { name, span, val } => { + engine::Argument::Named { + data, + name, + span, + val, + } => { let name_arg = engine::Argument::Positional { span: *span, - val: Value::string(name.as_ref(), *span), + val: Value::string( + std::str::from_utf8(&data[*name]).expect("invalid arg name"), + *span, + ), }; let val_arg = engine::Argument::Positional { span: *span, diff --git a/crates/nu-protocol/src/engine/argument.rs b/crates/nu-protocol/src/engine/argument.rs index 501635126b..09e932bc18 100644 --- a/crates/nu-protocol/src/engine/argument.rs +++ b/crates/nu-protocol/src/engine/argument.rs @@ -1,6 +1,6 @@ use std::sync::Arc; -use crate::{Span, Value}; +use crate::{ir::DataSlice, Span, Value}; /// Represents a fully evaluated argument to a call. #[derive(Debug, Clone)] @@ -16,12 +16,14 @@ pub enum Argument { }, /// A named argument with no value, e.g. `--flag` Flag { - name: Arc, + data: Arc<[u8]>, + name: DataSlice, span: Span, }, /// A named argument with a value, e.g. `--flag value` or `--flag=` Named { - name: Arc, + data: Arc<[u8]>, + name: DataSlice, span: Span, val: Value, }, diff --git a/crates/nu-protocol/src/ir/call.rs b/crates/nu-protocol/src/ir/call.rs index 52b73b45f9..643d195868 100644 --- a/crates/nu-protocol/src/ir/call.rs +++ b/crates/nu-protocol/src/ir/call.rs @@ -1,8 +1,12 @@ +use std::sync::Arc; + use crate::{ engine::{self, Argument, Stack}, DeclId, ShellError, Span, Spanned, Value, }; +use super::DataSlice; + /// Contains the information for a call being made to a declared command. #[derive(Debug, Clone)] pub struct Call { @@ -59,24 +63,30 @@ impl Call { } pub fn named_iter<'a>( - &self, + &'a self, stack: &'a Stack, ) -> impl Iterator, Option<&'a Value>)> + 'a { self.arguments(stack).iter().filter_map( |arg: &Argument| -> Option<(Spanned<&str>, Option<&Value>)> { match arg { - Argument::Flag { name, span, .. } => Some(( + Argument::Flag { + data, name, span, .. + } => Some(( Spanned { - item: name, + item: std::str::from_utf8(&data[*name]).expect("invalid arg name"), span: *span, }, None, )), Argument::Named { - name, span, val, .. + data, + name, + span, + val, + .. } => Some(( Spanned { - item: name, + item: std::str::from_utf8(&data[*name]).expect("invalid arg name"), span: *span, }, Some(val), @@ -88,8 +98,20 @@ impl Call { } pub fn get_named_arg<'a>(&self, stack: &'a Stack, flag_name: &str) -> Option<&'a Value> { - self.named_iter(stack) - .find_map(|(name, val)| (name.item == flag_name).then_some(val)) + // Optimized to avoid str::from_utf8() + self.arguments(stack) + .iter() + .find_map(|arg: &Argument| -> Option> { + match arg { + Argument::Flag { data, name, .. } if &data[*name] == flag_name.as_bytes() => { + Some(None) + } + Argument::Named { + data, name, val, .. + } if &data[*name] == flag_name.as_bytes() => Some(Some(val)), + _ => None, + } + }) .flatten() } @@ -192,13 +214,12 @@ impl CallBuilder { /// Add a flag (no-value named) argument to the [`Stack`] and reference it from the [`Call`]. pub fn add_flag(&mut self, stack: &mut Stack, name: impl AsRef, span: Span) -> &mut Self { - self.add_argument( - stack, - Argument::Flag { - name: name.as_ref().into(), - span, - }, - ) + let data: Arc<[u8]> = name.as_ref().as_bytes().into(); + let name = DataSlice { + start: 0, + len: data.len().try_into().expect("flag name too big"), + }; + self.add_argument(stack, Argument::Flag { data, name, span }) } /// Add a named argument to the [`Stack`] and reference it from the [`Call`]. @@ -209,10 +230,16 @@ impl CallBuilder { span: Span, val: Value, ) -> &mut Self { + let data: Arc<[u8]> = name.as_ref().as_bytes().into(); + let name = DataSlice { + start: 0, + len: data.len().try_into().expect("arg name too big"), + }; self.add_argument( stack, Argument::Named { - name: name.as_ref().into(), + data, + name, span, val, }, diff --git a/crates/nu-protocol/src/ir/display.rs b/crates/nu-protocol/src/ir/display.rs index a630206fe6..0c4e8e7961 100644 --- a/crates/nu-protocol/src/ir/display.rs +++ b/crates/nu-protocol/src/ir/display.rs @@ -2,7 +2,7 @@ use std::fmt; use crate::{engine::EngineState, DeclId, VarId}; -use super::{Instruction, IrBlock, RedirectMode}; +use super::{DataSlice, Instruction, IrBlock, Literal, RedirectMode}; pub struct FmtIrBlock<'a> { pub(super) engine_state: &'a EngineState, @@ -14,11 +14,13 @@ impl<'a> fmt::Display for FmtIrBlock<'a> { let plural = |count| if count == 1 { "" } else { "s" }; writeln!( f, - "# {} register{}, {} instruction{}", + "# {} register{}, {} instruction{}, {} byte{} of data", self.ir_block.register_count, plural(self.ir_block.register_count), self.ir_block.instructions.len(), plural(self.ir_block.instructions.len()), + self.ir_block.data.len(), + plural(self.ir_block.data.len()), )?; for (index, instruction) in self.ir_block.instructions.iter().enumerate() { writeln!( @@ -27,7 +29,8 @@ impl<'a> fmt::Display for FmtIrBlock<'a> { index, FmtInstruction { engine_state: self.engine_state, - instruction + instruction, + data: &self.ir_block.data, } )?; } @@ -38,6 +41,7 @@ impl<'a> fmt::Display for FmtIrBlock<'a> { pub struct FmtInstruction<'a> { pub(super) engine_state: &'a EngineState, pub(super) instruction: &'a Instruction, + pub(super) data: &'a [u8], } impl<'a> fmt::Display for FmtInstruction<'a> { @@ -46,7 +50,11 @@ impl<'a> fmt::Display for FmtInstruction<'a> { match self.instruction { Instruction::LoadLiteral { dst, lit } => { - write!(f, "{:WIDTH$} {dst}, {lit:?}", "load-literal") + let lit = FmtLiteral { + literal: lit, + data: self.data, + }; + write!(f, "{:WIDTH$} {dst}, {lit}", "load-literal") } Instruction::Move { dst, src } => { write!(f, "{:WIDTH$} {dst}, {src}", "move") @@ -69,13 +77,16 @@ impl<'a> fmt::Display for FmtInstruction<'a> { write!(f, "{:WIDTH$} {var}, {src}", "store-variable") } Instruction::LoadEnv { dst, key } => { - write!(f, "{:WIDTH$} {dst}, {key:?}", "load-env") + let key = FmtData(self.data, *key); + write!(f, "{:WIDTH$} {dst}, {key}", "load-env") } Instruction::LoadEnvOpt { dst, key } => { - write!(f, "{:WIDTH$} {dst}, {key:?}", "load-env-opt") + let key = FmtData(self.data, *key); + write!(f, "{:WIDTH$} {dst}, {key}", "load-env-opt") } Instruction::StoreEnv { key, src } => { - write!(f, "{:WIDTH$} {key:?}, {src}", "store-env") + let key = FmtData(self.data, *key); + write!(f, "{:WIDTH$} {key}, {src}", "store-env") } Instruction::PushPositional { src } => { write!(f, "{:WIDTH$} {src}", "push-positional") @@ -84,10 +95,12 @@ impl<'a> fmt::Display for FmtInstruction<'a> { write!(f, "{:WIDTH$} {src}", "append-rest") } Instruction::PushFlag { name } => { - write!(f, "{:WIDTH$} {name:?}", "push-flag") + let name = FmtData(self.data, *name); + write!(f, "{:WIDTH$} {name}", "push-flag") } Instruction::PushNamed { name, src } => { - write!(f, "{:WIDTH$} {name:?}, {src}", "push-named") + let name = FmtData(self.data, *name); + write!(f, "{:WIDTH$} {name}, {src}", "push-named") } Instruction::RedirectOut { mode } => { write!(f, "{:WIDTH$} {mode}", "redirect-out") @@ -99,6 +112,13 @@ impl<'a> fmt::Display for FmtInstruction<'a> { let decl = FmtDecl::new(self.engine_state, *decl_id); write!(f, "{:WIDTH$} {decl}, {src_dst}", "call") } + Instruction::ListPush { src_dst, item } => { + write!(f, "{:WIDTH$} {src_dst}, {item}", "list-push") + } + Instruction::RecordInsert { src_dst, key, val } => { + let key = FmtData(self.data, *key); + write!(f, "{:WIDTH$} {src_dst}, {key}, {val}", "record-insert") + } Instruction::BinaryOp { lhs_dst, op, rhs } => { write!(f, "{:WIDTH$} {lhs_dst}, {op:?}, {rhs}", "binary-op") } @@ -170,7 +190,7 @@ impl fmt::Display for FmtVar<'_> { } } -impl std::fmt::Display for RedirectMode { +impl fmt::Display for RedirectMode { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { RedirectMode::Pipe => write!(f, "pipe"), @@ -181,3 +201,62 @@ impl std::fmt::Display for RedirectMode { } } } + +struct FmtData<'a>(&'a [u8], DataSlice); + +impl<'a> fmt::Display for FmtData<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if let Ok(s) = std::str::from_utf8(&self.0[self.1]) { + // Write as string + write!(f, "{s:?}") + } else { + // Write as byte array + write!(f, "0x{:x?}", self.0) + } + } +} + +struct FmtLiteral<'a> { + literal: &'a Literal, + data: &'a [u8], +} + +impl<'a> fmt::Display for FmtLiteral<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self.literal { + Literal::Bool(b) => write!(f, "bool({b:?})"), + Literal::Int(i) => write!(f, "int({i:?})"), + Literal::Float(fl) => write!(f, "float({fl:?})"), + Literal::Binary(b) => write!(f, "binary({})", FmtData(self.data, *b)), + Literal::Block(id) => write!(f, "block({id})"), + Literal::Closure(id) => write!(f, "closure({id})"), + Literal::Range { + start, + step, + end, + inclusion, + } => write!(f, "range({start}, {step}, {end}, {inclusion:?})"), + Literal::List { capacity } => write!(f, "list(capacity = {capacity})"), + Literal::Record { capacity } => write!(f, "record(capacity = {capacity})"), + Literal::Filepath { val, no_expand } => write!( + f, + "filepath({}, no_expand = {no_expand:?})", + FmtData(self.data, *val) + ), + Literal::Directory { val, no_expand } => write!( + f, + "directory({}, no_expand = {no_expand:?})", + FmtData(self.data, *val) + ), + Literal::GlobPattern { val, no_expand } => write!( + f, + "glob-pattern({}, no_expand = {no_expand:?})", + FmtData(self.data, *val) + ), + Literal::String(s) => write!(f, "string({})", FmtData(self.data, *s)), + Literal::RawString(rs) => write!(f, "raw-string({})", FmtData(self.data, *rs)), + Literal::CellPath(p) => write!(f, "cell-path({p})"), + Literal::Nothing => write!(f, "nothing"), + } + } +} diff --git a/crates/nu-protocol/src/ir/mod.rs b/crates/nu-protocol/src/ir/mod.rs index e358a8af97..77f604e2d5 100644 --- a/crates/nu-protocol/src/ir/mod.rs +++ b/crates/nu-protocol/src/ir/mod.rs @@ -1,9 +1,9 @@ use std::sync::Arc; use crate::{ - ast::{CellPath, Operator}, + ast::{CellPath, Operator, RangeInclusion}, engine::EngineState, - BlockId, DeclId, RegId, Span, Spanned, VarId, + BlockId, DeclId, RegId, Span, VarId, }; use serde::{Deserialize, Serialize}; @@ -18,6 +18,8 @@ pub use display::{FmtInstruction, FmtIrBlock}; pub struct IrBlock { pub instructions: Vec, pub spans: Vec, + #[serde(with = "serde_arc_u8_array")] + pub data: Arc<[u8]>, pub register_count: usize, } @@ -32,6 +34,22 @@ impl IrBlock { } } +/// A slice into the `data` array of a block. This is a compact and cache-friendly way to store +/// string data that a block uses. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)] +pub struct DataSlice { + pub start: u32, + pub len: u32, +} + +impl std::ops::Index for [u8] { + type Output = [u8]; + + fn index(&self, index: DataSlice) -> &Self::Output { + &self[index.start as usize..(index.start as usize + index.len as usize)] + } +} + #[derive(Debug, Clone, Serialize, Deserialize)] pub enum Instruction { /// Load a literal value into the `dst` register @@ -49,27 +67,20 @@ pub enum Instruction { /// Store the value of a variable from the `src` register StoreVariable { var_id: VarId, src: RegId }, /// Load the value of an environment variable into the `dst` register - LoadEnv { dst: RegId, key: Box }, + LoadEnv { dst: RegId, key: DataSlice }, /// Load the value of an environment variable into the `dst` register, or `Nothing` if it /// doesn't exist - LoadEnvOpt { dst: RegId, key: Box }, + LoadEnvOpt { dst: RegId, key: DataSlice }, /// Store the value of an environment variable from the `src` register - StoreEnv { key: Box, src: RegId }, + StoreEnv { key: DataSlice, src: RegId }, /// Add a positional arg to the next call PushPositional { src: RegId }, /// Add a list of args to the next call (spread/rest) AppendRest { src: RegId }, /// Add a named arg with no value to the next call. - PushFlag { - #[serde(with = "serde_arc_str")] - name: Arc, - }, + PushFlag { name: DataSlice }, /// Add a named arg with a value to the next call. - PushNamed { - #[serde(with = "serde_arc_str")] - name: Arc, - src: RegId, - }, + PushNamed { name: DataSlice, src: RegId }, /// Set the redirection for stdout for the next call (only) RedirectOut { mode: RedirectMode }, /// Set the redirection for stderr for the next call (only) @@ -77,6 +88,14 @@ pub enum Instruction { /// Make a call. The input is taken from `src_dst`, and the output is placed in `src_dst`, /// overwriting it. The argument stack is used implicitly and cleared when the call ends. Call { decl_id: DeclId, src_dst: RegId }, + /// Push a value onto the end of a list. Used to construct list literals. + ListPush { src_dst: RegId, item: RegId }, + /// Insert a key-value pair into a record. Any existing value for the key is overwritten. + RecordInsert { + src_dst: RegId, + key: DataSlice, + val: RegId, + }, /// Do a binary operation on `lhs_dst` (left) and `rhs` (right) and write the result to /// `lhs_dst`. BinaryOp { @@ -96,7 +115,6 @@ pub enum Instruction { path: RegId, new_value: RegId, }, - /// Update a cell path /// Jump to an offset in this block Jump { index: usize }, /// Branch to an offset in this block if the value of the `cond` register is a true boolean, @@ -109,10 +127,15 @@ pub enum Instruction { impl Instruction { /// Returns a value that can be formatted with [`Display`](std::fmt::Display) to show a detailed /// listing of the instruction. - pub fn display<'a>(&'a self, engine_state: &'a EngineState) -> FmtInstruction<'a> { + pub fn display<'a>( + &'a self, + engine_state: &'a EngineState, + data: &'a [u8], + ) -> FmtInstruction<'a> { FmtInstruction { engine_state, instruction: self, + data, } } } @@ -128,15 +151,35 @@ pub enum Literal { Bool(bool), Int(i64), Float(f64), - Binary(Box<[u8]>), + Binary(DataSlice), Block(BlockId), Closure(BlockId), - List(Box<[Spanned]>), - Filepath { val: Box, no_expand: bool }, - Directory { val: Box, no_expand: bool }, - GlobPattern { val: Box, no_expand: bool }, - String(Box), - RawString(Box), + Range { + start: RegId, + step: RegId, + end: RegId, + inclusion: RangeInclusion, + }, + List { + capacity: usize, + }, + Record { + capacity: usize, + }, + Filepath { + val: DataSlice, + no_expand: bool, + }, + Directory { + val: DataSlice, + no_expand: bool, + }, + GlobPattern { + val: DataSlice, + no_expand: bool, + }, + String(DataSlice), + RawString(DataSlice), CellPath(Box), Nothing, } @@ -163,23 +206,23 @@ pub enum RedirectMode { }, } -/// Just a hack to allow `Arc` to be serialized and deserialized -mod serde_arc_str { +/// Just a hack to allow `Arc<[u8]>` to be serialized and deserialized +mod serde_arc_u8_array { use serde::{Deserialize, Serialize}; use std::sync::Arc; - pub fn serialize(string: &Arc, ser: S) -> Result + pub fn serialize(data: &Arc<[u8]>, ser: S) -> Result where S: serde::Serializer, { - string.as_ref().serialize(ser) + data.as_ref().serialize(ser) } - pub fn deserialize<'de, D>(de: D) -> Result, D::Error> + pub fn deserialize<'de, D>(de: D) -> Result, D::Error> where D: serde::Deserializer<'de>, { - let string: &'de str = Deserialize::deserialize(de)?; - Ok(string.into()) + let data: &'de [u8] = Deserialize::deserialize(de)?; + Ok(data.into()) } }