use a resource pool of preallocated register buffers on the Stack
This commit is contained in:
parent
352095a3b8
commit
aa328d608e
|
@ -1,4 +1,5 @@
|
|||
use nu_engine::{command_prelude::*, compile};
|
||||
use nu_protocol::engine::Closure;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ViewIr;
|
||||
|
@ -10,33 +11,26 @@ impl Command for ViewIr {
|
|||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::new(self.name()).required(
|
||||
"block",
|
||||
SyntaxShape::Block,
|
||||
"the block to see compiled code for",
|
||||
"closure",
|
||||
SyntaxShape::Closure(None),
|
||||
"the closure to see compiled code for",
|
||||
)
|
||||
}
|
||||
|
||||
fn usage(&self) -> &str {
|
||||
"View the compiled IR code for a block"
|
||||
"View the compiled IR code for a block of code"
|
||||
}
|
||||
|
||||
fn run(
|
||||
&self,
|
||||
engine_state: &EngineState,
|
||||
_stack: &mut Stack,
|
||||
stack: &mut Stack,
|
||||
call: &Call,
|
||||
_input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
let expr = call
|
||||
.positional_nth(0)
|
||||
.ok_or_else(|| ShellError::AccessEmptyContent { span: call.head })?;
|
||||
let closure: Closure = call.req(engine_state, stack, 0)?;
|
||||
|
||||
let block_id = expr.as_block().ok_or_else(|| ShellError::TypeMismatch {
|
||||
err_message: "expected block".into(),
|
||||
span: expr.span,
|
||||
})?;
|
||||
|
||||
let block = engine_state.get_block(block_id);
|
||||
let block = engine_state.get_block(closure.block_id);
|
||||
let ir_block = compile(&StateWorkingSet::new(engine_state), &block)?;
|
||||
|
||||
let formatted = format!("{}", ir_block.display(engine_state));
|
||||
|
|
|
@ -18,24 +18,20 @@ pub fn eval_ir_block<D: DebugContext>(
|
|||
|
||||
let block_span = block.span;
|
||||
|
||||
// Allocate required space for registers. We prefer to allocate on the stack, but will
|
||||
// allocate on the heap if it's over the compiled maximum size
|
||||
//
|
||||
// Keep in mind that there is some code generated for each variant; at least at the moment
|
||||
// it doesn't seem like LLVM is able to optimize this away
|
||||
//
|
||||
// This is organized like a tree to try to make sure we do the fewest number of branches
|
||||
let result = if ir_block.register_count <= 8 {
|
||||
if ir_block.register_count <= 4 {
|
||||
eval_ir_block_static::<D, 4>(engine_state, stack, &block_span, ir_block, input)
|
||||
} else {
|
||||
eval_ir_block_static::<D, 8>(engine_state, stack, &block_span, ir_block, input)
|
||||
}
|
||||
} else if ir_block.register_count <= 16 {
|
||||
eval_ir_block_static::<D, 16>(engine_state, stack, &block_span, ir_block, input)
|
||||
} else {
|
||||
eval_ir_block_dynamic::<D>(engine_state, stack, &block_span, ir_block, input)
|
||||
};
|
||||
let mut registers = stack.register_buf_cache.acquire(ir_block.register_count);
|
||||
|
||||
let result = eval_ir_block_impl::<D>(
|
||||
&mut EvalContext {
|
||||
engine_state,
|
||||
stack,
|
||||
registers: &mut registers[..],
|
||||
},
|
||||
&block_span,
|
||||
ir_block,
|
||||
input,
|
||||
);
|
||||
|
||||
stack.register_buf_cache.release(registers);
|
||||
|
||||
D::leave_block(engine_state, block);
|
||||
|
||||
|
@ -52,51 +48,6 @@ pub fn eval_ir_block<D: DebugContext>(
|
|||
}
|
||||
}
|
||||
|
||||
/// Eval an IR block with stack-allocated registers, the size of which must be known statically.
|
||||
fn eval_ir_block_static<D: DebugContext, const N: usize>(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
block_span: &Option<Span>,
|
||||
ir_block: &IrBlock,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
log::trace!(
|
||||
"entering block with {} registers on stack ({} requested)",
|
||||
N,
|
||||
ir_block.register_count
|
||||
);
|
||||
const EMPTY: PipelineData = PipelineData::Empty;
|
||||
let mut array = [EMPTY; N];
|
||||
let mut ctx = EvalContext {
|
||||
engine_state,
|
||||
stack,
|
||||
registers: &mut array[..],
|
||||
};
|
||||
eval_ir_block_impl::<D>(&mut ctx, block_span, ir_block, input)
|
||||
}
|
||||
|
||||
/// Eval an IR block with heap-allocated registers.
|
||||
fn eval_ir_block_dynamic<D: DebugContext>(
|
||||
engine_state: &EngineState,
|
||||
stack: &mut Stack,
|
||||
block_span: &Option<Span>,
|
||||
ir_block: &IrBlock,
|
||||
input: PipelineData,
|
||||
) -> Result<PipelineData, ShellError> {
|
||||
log::trace!(
|
||||
"entering block with {} registers on heap",
|
||||
ir_block.register_count
|
||||
);
|
||||
let mut vec = Vec::with_capacity(ir_block.register_count);
|
||||
vec.extend(std::iter::repeat_with(|| PipelineData::Empty).take(ir_block.register_count));
|
||||
let mut ctx = EvalContext {
|
||||
engine_state,
|
||||
stack,
|
||||
registers: &mut vec[..],
|
||||
};
|
||||
eval_ir_block_impl::<D>(&mut ctx, block_span, ir_block, input)
|
||||
}
|
||||
|
||||
/// All of the pointers necessary for evaluation
|
||||
struct EvalContext<'a> {
|
||||
engine_state: &'a EngineState,
|
||||
|
|
|
@ -5,6 +5,7 @@ mod command;
|
|||
mod engine_state;
|
||||
mod overlay;
|
||||
mod pattern_match;
|
||||
mod register_buf_cache;
|
||||
mod stack;
|
||||
mod stack_out_dest;
|
||||
mod state_delta;
|
||||
|
@ -20,6 +21,7 @@ pub use command::*;
|
|||
pub use engine_state::*;
|
||||
pub use overlay::*;
|
||||
pub use pattern_match::*;
|
||||
pub use register_buf_cache::*;
|
||||
pub use stack::*;
|
||||
pub use stack_out_dest::*;
|
||||
pub use state_delta::*;
|
||||
|
|
62
crates/nu-protocol/src/engine/register_buf_cache.rs
Normal file
62
crates/nu-protocol/src/engine/register_buf_cache.rs
Normal file
|
@ -0,0 +1,62 @@
|
|||
use std::fmt;
|
||||
|
||||
use crate::PipelineData;
|
||||
|
||||
/// Retains buffers for reuse in IR evaluation, avoiding heap allocation.
|
||||
///
|
||||
/// This is implemented in such a way that [`Clone`] is still possible, by making the fact that the
|
||||
/// buffers can't be preserved on clone completely transparent. The cached buffers are always empty.
|
||||
pub struct RegisterBufCache {
|
||||
bufs: Vec<Vec<PipelineData>>,
|
||||
}
|
||||
|
||||
// SAFETY: because `bufs` only ever contains empty `Vec`s, it doesn't actually contain any of the
|
||||
// data.
|
||||
unsafe impl Send for RegisterBufCache {}
|
||||
unsafe impl Sync for RegisterBufCache {}
|
||||
|
||||
impl RegisterBufCache {
|
||||
/// Create a new cache with no register buffers.
|
||||
pub const fn new() -> Self {
|
||||
RegisterBufCache { bufs: vec![] }
|
||||
}
|
||||
|
||||
/// Acquire a new register buffer from the cache. The buffer will be extended to `size` with
|
||||
/// [`Empty`](PipelineData::Empty) elements.
|
||||
pub fn acquire(&mut self, size: usize) -> Vec<PipelineData> {
|
||||
let mut buf = if let Some(buf) = self.bufs.pop() {
|
||||
debug_assert!(buf.is_empty());
|
||||
buf
|
||||
} else {
|
||||
Vec::new()
|
||||
};
|
||||
buf.reserve(size);
|
||||
buf.extend(std::iter::repeat_with(|| PipelineData::Empty).take(size));
|
||||
buf
|
||||
}
|
||||
|
||||
/// Release a used register buffer to the cache. The buffer will be cleared.
|
||||
pub fn release(&mut self, mut buf: Vec<PipelineData>) {
|
||||
// SAFETY: this `clear` is necessary for the `unsafe impl`s to be safe
|
||||
buf.clear();
|
||||
self.bufs.push(buf);
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for RegisterBufCache {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
let bufs = self.bufs.len();
|
||||
let bytes: usize = self
|
||||
.bufs
|
||||
.iter()
|
||||
.map(|b| b.capacity() * std::mem::size_of::<PipelineData>())
|
||||
.sum();
|
||||
write!(f, "RegisterBufCache({bufs} bufs, {bytes} bytes)")
|
||||
}
|
||||
}
|
||||
|
||||
impl Clone for RegisterBufCache {
|
||||
fn clone(&self) -> Self {
|
||||
RegisterBufCache::new()
|
||||
}
|
||||
}
|
|
@ -11,6 +11,8 @@ use std::{
|
|||
sync::Arc,
|
||||
};
|
||||
|
||||
use super::RegisterBufCache;
|
||||
|
||||
/// Environment variables per overlay
|
||||
pub type EnvVars = HashMap<String, HashMap<String, Value>>;
|
||||
|
||||
|
@ -41,6 +43,8 @@ pub struct Stack {
|
|||
pub env_hidden: HashMap<String, HashSet<String>>,
|
||||
/// List of active overlays
|
||||
pub active_overlays: Vec<String>,
|
||||
/// Cached register buffers for IR evaluation
|
||||
pub register_buf_cache: RegisterBufCache,
|
||||
pub recursion_count: u64,
|
||||
pub parent_stack: Option<Arc<Stack>>,
|
||||
/// Variables that have been deleted (this is used to hide values from parent stack lookups)
|
||||
|
@ -68,6 +72,7 @@ impl Stack {
|
|||
env_vars: Vec::new(),
|
||||
env_hidden: HashMap::new(),
|
||||
active_overlays: vec![DEFAULT_OVERLAY_NAME.to_string()],
|
||||
register_buf_cache: RegisterBufCache::new(),
|
||||
recursion_count: 0,
|
||||
parent_stack: None,
|
||||
parent_deletions: vec![],
|
||||
|
@ -85,6 +90,7 @@ impl Stack {
|
|||
env_vars: parent.env_vars.clone(),
|
||||
env_hidden: parent.env_hidden.clone(),
|
||||
active_overlays: parent.active_overlays.clone(),
|
||||
register_buf_cache: RegisterBufCache::new(),
|
||||
recursion_count: parent.recursion_count,
|
||||
vars: vec![],
|
||||
parent_deletions: vec![],
|
||||
|
@ -254,6 +260,7 @@ impl Stack {
|
|||
env_vars,
|
||||
env_hidden: self.env_hidden.clone(),
|
||||
active_overlays: self.active_overlays.clone(),
|
||||
register_buf_cache: RegisterBufCache::new(),
|
||||
recursion_count: self.recursion_count,
|
||||
parent_stack: None,
|
||||
parent_deletions: vec![],
|
||||
|
@ -284,6 +291,7 @@ impl Stack {
|
|||
env_vars,
|
||||
env_hidden: self.env_hidden.clone(),
|
||||
active_overlays: self.active_overlays.clone(),
|
||||
register_buf_cache: RegisterBufCache::new(),
|
||||
recursion_count: self.recursion_count,
|
||||
parent_stack: None,
|
||||
parent_deletions: vec![],
|
||||
|
|
Loading…
Reference in New Issue
Block a user