use a resource pool of preallocated register buffers on the Stack

This commit is contained in:
Devyn Cairns 2024-06-12 03:40:15 -07:00
parent 352095a3b8
commit aa328d608e
5 changed files with 94 additions and 77 deletions

View File

@ -1,4 +1,5 @@
use nu_engine::{command_prelude::*, compile}; use nu_engine::{command_prelude::*, compile};
use nu_protocol::engine::Closure;
#[derive(Clone)] #[derive(Clone)]
pub struct ViewIr; pub struct ViewIr;
@ -10,33 +11,26 @@ impl Command for ViewIr {
fn signature(&self) -> Signature { fn signature(&self) -> Signature {
Signature::new(self.name()).required( Signature::new(self.name()).required(
"block", "closure",
SyntaxShape::Block, SyntaxShape::Closure(None),
"the block to see compiled code for", "the closure to see compiled code for",
) )
} }
fn usage(&self) -> &str { fn usage(&self) -> &str {
"View the compiled IR code for a block" "View the compiled IR code for a block of code"
} }
fn run( fn run(
&self, &self,
engine_state: &EngineState, engine_state: &EngineState,
_stack: &mut Stack, stack: &mut Stack,
call: &Call, call: &Call,
_input: PipelineData, _input: PipelineData,
) -> Result<PipelineData, ShellError> { ) -> Result<PipelineData, ShellError> {
let expr = call let closure: Closure = call.req(engine_state, stack, 0)?;
.positional_nth(0)
.ok_or_else(|| ShellError::AccessEmptyContent { span: call.head })?;
let block_id = expr.as_block().ok_or_else(|| ShellError::TypeMismatch { let block = engine_state.get_block(closure.block_id);
err_message: "expected block".into(),
span: expr.span,
})?;
let block = engine_state.get_block(block_id);
let ir_block = compile(&StateWorkingSet::new(engine_state), &block)?; let ir_block = compile(&StateWorkingSet::new(engine_state), &block)?;
let formatted = format!("{}", ir_block.display(engine_state)); let formatted = format!("{}", ir_block.display(engine_state));

View File

@ -18,24 +18,20 @@ pub fn eval_ir_block<D: DebugContext>(
let block_span = block.span; let block_span = block.span;
// Allocate required space for registers. We prefer to allocate on the stack, but will let mut registers = stack.register_buf_cache.acquire(ir_block.register_count);
// allocate on the heap if it's over the compiled maximum size
// let result = eval_ir_block_impl::<D>(
// Keep in mind that there is some code generated for each variant; at least at the moment &mut EvalContext {
// it doesn't seem like LLVM is able to optimize this away engine_state,
// stack,
// This is organized like a tree to try to make sure we do the fewest number of branches registers: &mut registers[..],
let result = if ir_block.register_count <= 8 { },
if ir_block.register_count <= 4 { &block_span,
eval_ir_block_static::<D, 4>(engine_state, stack, &block_span, ir_block, input) ir_block,
} else { input,
eval_ir_block_static::<D, 8>(engine_state, stack, &block_span, ir_block, input) );
}
} else if ir_block.register_count <= 16 { stack.register_buf_cache.release(registers);
eval_ir_block_static::<D, 16>(engine_state, stack, &block_span, ir_block, input)
} else {
eval_ir_block_dynamic::<D>(engine_state, stack, &block_span, ir_block, input)
};
D::leave_block(engine_state, block); D::leave_block(engine_state, block);
@ -52,51 +48,6 @@ pub fn eval_ir_block<D: DebugContext>(
} }
} }
/// Eval an IR block with stack-allocated registers, the size of which must be known statically.
fn eval_ir_block_static<D: DebugContext, const N: usize>(
engine_state: &EngineState,
stack: &mut Stack,
block_span: &Option<Span>,
ir_block: &IrBlock,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
log::trace!(
"entering block with {} registers on stack ({} requested)",
N,
ir_block.register_count
);
const EMPTY: PipelineData = PipelineData::Empty;
let mut array = [EMPTY; N];
let mut ctx = EvalContext {
engine_state,
stack,
registers: &mut array[..],
};
eval_ir_block_impl::<D>(&mut ctx, block_span, ir_block, input)
}
/// Eval an IR block with heap-allocated registers.
fn eval_ir_block_dynamic<D: DebugContext>(
engine_state: &EngineState,
stack: &mut Stack,
block_span: &Option<Span>,
ir_block: &IrBlock,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
log::trace!(
"entering block with {} registers on heap",
ir_block.register_count
);
let mut vec = Vec::with_capacity(ir_block.register_count);
vec.extend(std::iter::repeat_with(|| PipelineData::Empty).take(ir_block.register_count));
let mut ctx = EvalContext {
engine_state,
stack,
registers: &mut vec[..],
};
eval_ir_block_impl::<D>(&mut ctx, block_span, ir_block, input)
}
/// All of the pointers necessary for evaluation /// All of the pointers necessary for evaluation
struct EvalContext<'a> { struct EvalContext<'a> {
engine_state: &'a EngineState, engine_state: &'a EngineState,

View File

@ -5,6 +5,7 @@ mod command;
mod engine_state; mod engine_state;
mod overlay; mod overlay;
mod pattern_match; mod pattern_match;
mod register_buf_cache;
mod stack; mod stack;
mod stack_out_dest; mod stack_out_dest;
mod state_delta; mod state_delta;
@ -20,6 +21,7 @@ pub use command::*;
pub use engine_state::*; pub use engine_state::*;
pub use overlay::*; pub use overlay::*;
pub use pattern_match::*; pub use pattern_match::*;
pub use register_buf_cache::*;
pub use stack::*; pub use stack::*;
pub use stack_out_dest::*; pub use stack_out_dest::*;
pub use state_delta::*; pub use state_delta::*;

View File

@ -0,0 +1,62 @@
use std::fmt;
use crate::PipelineData;
/// Retains buffers for reuse in IR evaluation, avoiding heap allocation.
///
/// This is implemented in such a way that [`Clone`] is still possible, by making the fact that the
/// buffers can't be preserved on clone completely transparent. The cached buffers are always empty.
pub struct RegisterBufCache {
bufs: Vec<Vec<PipelineData>>,
}
// SAFETY: because `bufs` only ever contains empty `Vec`s, it doesn't actually contain any of the
// data.
unsafe impl Send for RegisterBufCache {}
unsafe impl Sync for RegisterBufCache {}
impl RegisterBufCache {
/// Create a new cache with no register buffers.
pub const fn new() -> Self {
RegisterBufCache { bufs: vec![] }
}
/// Acquire a new register buffer from the cache. The buffer will be extended to `size` with
/// [`Empty`](PipelineData::Empty) elements.
pub fn acquire(&mut self, size: usize) -> Vec<PipelineData> {
let mut buf = if let Some(buf) = self.bufs.pop() {
debug_assert!(buf.is_empty());
buf
} else {
Vec::new()
};
buf.reserve(size);
buf.extend(std::iter::repeat_with(|| PipelineData::Empty).take(size));
buf
}
/// Release a used register buffer to the cache. The buffer will be cleared.
pub fn release(&mut self, mut buf: Vec<PipelineData>) {
// SAFETY: this `clear` is necessary for the `unsafe impl`s to be safe
buf.clear();
self.bufs.push(buf);
}
}
impl fmt::Debug for RegisterBufCache {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let bufs = self.bufs.len();
let bytes: usize = self
.bufs
.iter()
.map(|b| b.capacity() * std::mem::size_of::<PipelineData>())
.sum();
write!(f, "RegisterBufCache({bufs} bufs, {bytes} bytes)")
}
}
impl Clone for RegisterBufCache {
fn clone(&self) -> Self {
RegisterBufCache::new()
}
}

View File

@ -11,6 +11,8 @@ use std::{
sync::Arc, sync::Arc,
}; };
use super::RegisterBufCache;
/// Environment variables per overlay /// Environment variables per overlay
pub type EnvVars = HashMap<String, HashMap<String, Value>>; pub type EnvVars = HashMap<String, HashMap<String, Value>>;
@ -41,6 +43,8 @@ pub struct Stack {
pub env_hidden: HashMap<String, HashSet<String>>, pub env_hidden: HashMap<String, HashSet<String>>,
/// List of active overlays /// List of active overlays
pub active_overlays: Vec<String>, pub active_overlays: Vec<String>,
/// Cached register buffers for IR evaluation
pub register_buf_cache: RegisterBufCache,
pub recursion_count: u64, pub recursion_count: u64,
pub parent_stack: Option<Arc<Stack>>, pub parent_stack: Option<Arc<Stack>>,
/// Variables that have been deleted (this is used to hide values from parent stack lookups) /// Variables that have been deleted (this is used to hide values from parent stack lookups)
@ -68,6 +72,7 @@ impl Stack {
env_vars: Vec::new(), env_vars: Vec::new(),
env_hidden: HashMap::new(), env_hidden: HashMap::new(),
active_overlays: vec![DEFAULT_OVERLAY_NAME.to_string()], active_overlays: vec![DEFAULT_OVERLAY_NAME.to_string()],
register_buf_cache: RegisterBufCache::new(),
recursion_count: 0, recursion_count: 0,
parent_stack: None, parent_stack: None,
parent_deletions: vec![], parent_deletions: vec![],
@ -85,6 +90,7 @@ impl Stack {
env_vars: parent.env_vars.clone(), env_vars: parent.env_vars.clone(),
env_hidden: parent.env_hidden.clone(), env_hidden: parent.env_hidden.clone(),
active_overlays: parent.active_overlays.clone(), active_overlays: parent.active_overlays.clone(),
register_buf_cache: RegisterBufCache::new(),
recursion_count: parent.recursion_count, recursion_count: parent.recursion_count,
vars: vec![], vars: vec![],
parent_deletions: vec![], parent_deletions: vec![],
@ -254,6 +260,7 @@ impl Stack {
env_vars, env_vars,
env_hidden: self.env_hidden.clone(), env_hidden: self.env_hidden.clone(),
active_overlays: self.active_overlays.clone(), active_overlays: self.active_overlays.clone(),
register_buf_cache: RegisterBufCache::new(),
recursion_count: self.recursion_count, recursion_count: self.recursion_count,
parent_stack: None, parent_stack: None,
parent_deletions: vec![], parent_deletions: vec![],
@ -284,6 +291,7 @@ impl Stack {
env_vars, env_vars,
env_hidden: self.env_hidden.clone(), env_hidden: self.env_hidden.clone(),
active_overlays: self.active_overlays.clone(), active_overlays: self.active_overlays.clone(),
register_buf_cache: RegisterBufCache::new(),
recursion_count: self.recursion_count, recursion_count: self.recursion_count,
parent_stack: None, parent_stack: None,
parent_deletions: vec![], parent_deletions: vec![],