From aa328d608e1800eb8cead8d0cc0dad5e6cc2e0de Mon Sep 17 00:00:00 2001 From: Devyn Cairns Date: Wed, 12 Jun 2024 03:40:15 -0700 Subject: [PATCH] use a resource pool of preallocated register buffers on the Stack --- crates/nu-command/src/debug/view_ir.rs | 22 ++---- crates/nu-engine/src/eval_ir.rs | 77 ++++--------------- crates/nu-protocol/src/engine/mod.rs | 2 + .../src/engine/register_buf_cache.rs | 62 +++++++++++++++ crates/nu-protocol/src/engine/stack.rs | 8 ++ 5 files changed, 94 insertions(+), 77 deletions(-) create mode 100644 crates/nu-protocol/src/engine/register_buf_cache.rs diff --git a/crates/nu-command/src/debug/view_ir.rs b/crates/nu-command/src/debug/view_ir.rs index 85220a2b11..a3ffbbf45a 100644 --- a/crates/nu-command/src/debug/view_ir.rs +++ b/crates/nu-command/src/debug/view_ir.rs @@ -1,4 +1,5 @@ use nu_engine::{command_prelude::*, compile}; +use nu_protocol::engine::Closure; #[derive(Clone)] pub struct ViewIr; @@ -10,33 +11,26 @@ impl Command for ViewIr { fn signature(&self) -> Signature { Signature::new(self.name()).required( - "block", - SyntaxShape::Block, - "the block to see compiled code for", + "closure", + SyntaxShape::Closure(None), + "the closure to see compiled code for", ) } fn usage(&self) -> &str { - "View the compiled IR code for a block" + "View the compiled IR code for a block of code" } fn run( &self, engine_state: &EngineState, - _stack: &mut Stack, + stack: &mut Stack, call: &Call, _input: PipelineData, ) -> Result { - let expr = call - .positional_nth(0) - .ok_or_else(|| ShellError::AccessEmptyContent { span: call.head })?; + let closure: Closure = call.req(engine_state, stack, 0)?; - let block_id = expr.as_block().ok_or_else(|| ShellError::TypeMismatch { - err_message: "expected block".into(), - span: expr.span, - })?; - - let block = engine_state.get_block(block_id); + let block = engine_state.get_block(closure.block_id); let ir_block = compile(&StateWorkingSet::new(engine_state), &block)?; let formatted = format!("{}", ir_block.display(engine_state)); diff --git a/crates/nu-engine/src/eval_ir.rs b/crates/nu-engine/src/eval_ir.rs index f6bfbcb4d9..de50df10eb 100644 --- a/crates/nu-engine/src/eval_ir.rs +++ b/crates/nu-engine/src/eval_ir.rs @@ -18,24 +18,20 @@ pub fn eval_ir_block( let block_span = block.span; - // Allocate required space for registers. We prefer to allocate on the stack, but will - // allocate on the heap if it's over the compiled maximum size - // - // Keep in mind that there is some code generated for each variant; at least at the moment - // it doesn't seem like LLVM is able to optimize this away - // - // This is organized like a tree to try to make sure we do the fewest number of branches - let result = if ir_block.register_count <= 8 { - if ir_block.register_count <= 4 { - eval_ir_block_static::(engine_state, stack, &block_span, ir_block, input) - } else { - eval_ir_block_static::(engine_state, stack, &block_span, ir_block, input) - } - } else if ir_block.register_count <= 16 { - eval_ir_block_static::(engine_state, stack, &block_span, ir_block, input) - } else { - eval_ir_block_dynamic::(engine_state, stack, &block_span, ir_block, input) - }; + let mut registers = stack.register_buf_cache.acquire(ir_block.register_count); + + let result = eval_ir_block_impl::( + &mut EvalContext { + engine_state, + stack, + registers: &mut registers[..], + }, + &block_span, + ir_block, + input, + ); + + stack.register_buf_cache.release(registers); D::leave_block(engine_state, block); @@ -52,51 +48,6 @@ pub fn eval_ir_block( } } -/// Eval an IR block with stack-allocated registers, the size of which must be known statically. -fn eval_ir_block_static( - engine_state: &EngineState, - stack: &mut Stack, - block_span: &Option, - ir_block: &IrBlock, - input: PipelineData, -) -> Result { - log::trace!( - "entering block with {} registers on stack ({} requested)", - N, - ir_block.register_count - ); - const EMPTY: PipelineData = PipelineData::Empty; - let mut array = [EMPTY; N]; - let mut ctx = EvalContext { - engine_state, - stack, - registers: &mut array[..], - }; - eval_ir_block_impl::(&mut ctx, block_span, ir_block, input) -} - -/// Eval an IR block with heap-allocated registers. -fn eval_ir_block_dynamic( - engine_state: &EngineState, - stack: &mut Stack, - block_span: &Option, - ir_block: &IrBlock, - input: PipelineData, -) -> Result { - log::trace!( - "entering block with {} registers on heap", - ir_block.register_count - ); - let mut vec = Vec::with_capacity(ir_block.register_count); - vec.extend(std::iter::repeat_with(|| PipelineData::Empty).take(ir_block.register_count)); - let mut ctx = EvalContext { - engine_state, - stack, - registers: &mut vec[..], - }; - eval_ir_block_impl::(&mut ctx, block_span, ir_block, input) -} - /// All of the pointers necessary for evaluation struct EvalContext<'a> { engine_state: &'a EngineState, diff --git a/crates/nu-protocol/src/engine/mod.rs b/crates/nu-protocol/src/engine/mod.rs index c6e71afb37..814049a9ce 100644 --- a/crates/nu-protocol/src/engine/mod.rs +++ b/crates/nu-protocol/src/engine/mod.rs @@ -5,6 +5,7 @@ mod command; mod engine_state; mod overlay; mod pattern_match; +mod register_buf_cache; mod stack; mod stack_out_dest; mod state_delta; @@ -20,6 +21,7 @@ pub use command::*; pub use engine_state::*; pub use overlay::*; pub use pattern_match::*; +pub use register_buf_cache::*; pub use stack::*; pub use stack_out_dest::*; pub use state_delta::*; diff --git a/crates/nu-protocol/src/engine/register_buf_cache.rs b/crates/nu-protocol/src/engine/register_buf_cache.rs new file mode 100644 index 0000000000..f2582cf53d --- /dev/null +++ b/crates/nu-protocol/src/engine/register_buf_cache.rs @@ -0,0 +1,62 @@ +use std::fmt; + +use crate::PipelineData; + +/// Retains buffers for reuse in IR evaluation, avoiding heap allocation. +/// +/// This is implemented in such a way that [`Clone`] is still possible, by making the fact that the +/// buffers can't be preserved on clone completely transparent. The cached buffers are always empty. +pub struct RegisterBufCache { + bufs: Vec>, +} + +// SAFETY: because `bufs` only ever contains empty `Vec`s, it doesn't actually contain any of the +// data. +unsafe impl Send for RegisterBufCache {} +unsafe impl Sync for RegisterBufCache {} + +impl RegisterBufCache { + /// Create a new cache with no register buffers. + pub const fn new() -> Self { + RegisterBufCache { bufs: vec![] } + } + + /// Acquire a new register buffer from the cache. The buffer will be extended to `size` with + /// [`Empty`](PipelineData::Empty) elements. + pub fn acquire(&mut self, size: usize) -> Vec { + let mut buf = if let Some(buf) = self.bufs.pop() { + debug_assert!(buf.is_empty()); + buf + } else { + Vec::new() + }; + buf.reserve(size); + buf.extend(std::iter::repeat_with(|| PipelineData::Empty).take(size)); + buf + } + + /// Release a used register buffer to the cache. The buffer will be cleared. + pub fn release(&mut self, mut buf: Vec) { + // SAFETY: this `clear` is necessary for the `unsafe impl`s to be safe + buf.clear(); + self.bufs.push(buf); + } +} + +impl fmt::Debug for RegisterBufCache { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let bufs = self.bufs.len(); + let bytes: usize = self + .bufs + .iter() + .map(|b| b.capacity() * std::mem::size_of::()) + .sum(); + write!(f, "RegisterBufCache({bufs} bufs, {bytes} bytes)") + } +} + +impl Clone for RegisterBufCache { + fn clone(&self) -> Self { + RegisterBufCache::new() + } +} diff --git a/crates/nu-protocol/src/engine/stack.rs b/crates/nu-protocol/src/engine/stack.rs index 19726db9c0..8c3f96e827 100644 --- a/crates/nu-protocol/src/engine/stack.rs +++ b/crates/nu-protocol/src/engine/stack.rs @@ -11,6 +11,8 @@ use std::{ sync::Arc, }; +use super::RegisterBufCache; + /// Environment variables per overlay pub type EnvVars = HashMap>; @@ -41,6 +43,8 @@ pub struct Stack { pub env_hidden: HashMap>, /// List of active overlays pub active_overlays: Vec, + /// Cached register buffers for IR evaluation + pub register_buf_cache: RegisterBufCache, pub recursion_count: u64, pub parent_stack: Option>, /// Variables that have been deleted (this is used to hide values from parent stack lookups) @@ -68,6 +72,7 @@ impl Stack { env_vars: Vec::new(), env_hidden: HashMap::new(), active_overlays: vec![DEFAULT_OVERLAY_NAME.to_string()], + register_buf_cache: RegisterBufCache::new(), recursion_count: 0, parent_stack: None, parent_deletions: vec![], @@ -85,6 +90,7 @@ impl Stack { env_vars: parent.env_vars.clone(), env_hidden: parent.env_hidden.clone(), active_overlays: parent.active_overlays.clone(), + register_buf_cache: RegisterBufCache::new(), recursion_count: parent.recursion_count, vars: vec![], parent_deletions: vec![], @@ -254,6 +260,7 @@ impl Stack { env_vars, env_hidden: self.env_hidden.clone(), active_overlays: self.active_overlays.clone(), + register_buf_cache: RegisterBufCache::new(), recursion_count: self.recursion_count, parent_stack: None, parent_deletions: vec![], @@ -284,6 +291,7 @@ impl Stack { env_vars, env_hidden: self.env_hidden.clone(), active_overlays: self.active_overlays.clone(), + register_buf_cache: RegisterBufCache::new(), recursion_count: self.recursion_count, parent_stack: None, parent_deletions: vec![],