use a resource pool of preallocated register buffers on the Stack
This commit is contained in:
parent
352095a3b8
commit
aa328d608e
|
@ -1,4 +1,5 @@
|
||||||
use nu_engine::{command_prelude::*, compile};
|
use nu_engine::{command_prelude::*, compile};
|
||||||
|
use nu_protocol::engine::Closure;
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct ViewIr;
|
pub struct ViewIr;
|
||||||
|
@ -10,33 +11,26 @@ impl Command for ViewIr {
|
||||||
|
|
||||||
fn signature(&self) -> Signature {
|
fn signature(&self) -> Signature {
|
||||||
Signature::new(self.name()).required(
|
Signature::new(self.name()).required(
|
||||||
"block",
|
"closure",
|
||||||
SyntaxShape::Block,
|
SyntaxShape::Closure(None),
|
||||||
"the block to see compiled code for",
|
"the closure to see compiled code for",
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn usage(&self) -> &str {
|
fn usage(&self) -> &str {
|
||||||
"View the compiled IR code for a block"
|
"View the compiled IR code for a block of code"
|
||||||
}
|
}
|
||||||
|
|
||||||
fn run(
|
fn run(
|
||||||
&self,
|
&self,
|
||||||
engine_state: &EngineState,
|
engine_state: &EngineState,
|
||||||
_stack: &mut Stack,
|
stack: &mut Stack,
|
||||||
call: &Call,
|
call: &Call,
|
||||||
_input: PipelineData,
|
_input: PipelineData,
|
||||||
) -> Result<PipelineData, ShellError> {
|
) -> Result<PipelineData, ShellError> {
|
||||||
let expr = call
|
let closure: Closure = call.req(engine_state, stack, 0)?;
|
||||||
.positional_nth(0)
|
|
||||||
.ok_or_else(|| ShellError::AccessEmptyContent { span: call.head })?;
|
|
||||||
|
|
||||||
let block_id = expr.as_block().ok_or_else(|| ShellError::TypeMismatch {
|
let block = engine_state.get_block(closure.block_id);
|
||||||
err_message: "expected block".into(),
|
|
||||||
span: expr.span,
|
|
||||||
})?;
|
|
||||||
|
|
||||||
let block = engine_state.get_block(block_id);
|
|
||||||
let ir_block = compile(&StateWorkingSet::new(engine_state), &block)?;
|
let ir_block = compile(&StateWorkingSet::new(engine_state), &block)?;
|
||||||
|
|
||||||
let formatted = format!("{}", ir_block.display(engine_state));
|
let formatted = format!("{}", ir_block.display(engine_state));
|
||||||
|
|
|
@ -18,24 +18,20 @@ pub fn eval_ir_block<D: DebugContext>(
|
||||||
|
|
||||||
let block_span = block.span;
|
let block_span = block.span;
|
||||||
|
|
||||||
// Allocate required space for registers. We prefer to allocate on the stack, but will
|
let mut registers = stack.register_buf_cache.acquire(ir_block.register_count);
|
||||||
// allocate on the heap if it's over the compiled maximum size
|
|
||||||
//
|
let result = eval_ir_block_impl::<D>(
|
||||||
// Keep in mind that there is some code generated for each variant; at least at the moment
|
&mut EvalContext {
|
||||||
// it doesn't seem like LLVM is able to optimize this away
|
engine_state,
|
||||||
//
|
stack,
|
||||||
// This is organized like a tree to try to make sure we do the fewest number of branches
|
registers: &mut registers[..],
|
||||||
let result = if ir_block.register_count <= 8 {
|
},
|
||||||
if ir_block.register_count <= 4 {
|
&block_span,
|
||||||
eval_ir_block_static::<D, 4>(engine_state, stack, &block_span, ir_block, input)
|
ir_block,
|
||||||
} else {
|
input,
|
||||||
eval_ir_block_static::<D, 8>(engine_state, stack, &block_span, ir_block, input)
|
);
|
||||||
}
|
|
||||||
} else if ir_block.register_count <= 16 {
|
stack.register_buf_cache.release(registers);
|
||||||
eval_ir_block_static::<D, 16>(engine_state, stack, &block_span, ir_block, input)
|
|
||||||
} else {
|
|
||||||
eval_ir_block_dynamic::<D>(engine_state, stack, &block_span, ir_block, input)
|
|
||||||
};
|
|
||||||
|
|
||||||
D::leave_block(engine_state, block);
|
D::leave_block(engine_state, block);
|
||||||
|
|
||||||
|
@ -52,51 +48,6 @@ pub fn eval_ir_block<D: DebugContext>(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Eval an IR block with stack-allocated registers, the size of which must be known statically.
|
|
||||||
fn eval_ir_block_static<D: DebugContext, const N: usize>(
|
|
||||||
engine_state: &EngineState,
|
|
||||||
stack: &mut Stack,
|
|
||||||
block_span: &Option<Span>,
|
|
||||||
ir_block: &IrBlock,
|
|
||||||
input: PipelineData,
|
|
||||||
) -> Result<PipelineData, ShellError> {
|
|
||||||
log::trace!(
|
|
||||||
"entering block with {} registers on stack ({} requested)",
|
|
||||||
N,
|
|
||||||
ir_block.register_count
|
|
||||||
);
|
|
||||||
const EMPTY: PipelineData = PipelineData::Empty;
|
|
||||||
let mut array = [EMPTY; N];
|
|
||||||
let mut ctx = EvalContext {
|
|
||||||
engine_state,
|
|
||||||
stack,
|
|
||||||
registers: &mut array[..],
|
|
||||||
};
|
|
||||||
eval_ir_block_impl::<D>(&mut ctx, block_span, ir_block, input)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Eval an IR block with heap-allocated registers.
|
|
||||||
fn eval_ir_block_dynamic<D: DebugContext>(
|
|
||||||
engine_state: &EngineState,
|
|
||||||
stack: &mut Stack,
|
|
||||||
block_span: &Option<Span>,
|
|
||||||
ir_block: &IrBlock,
|
|
||||||
input: PipelineData,
|
|
||||||
) -> Result<PipelineData, ShellError> {
|
|
||||||
log::trace!(
|
|
||||||
"entering block with {} registers on heap",
|
|
||||||
ir_block.register_count
|
|
||||||
);
|
|
||||||
let mut vec = Vec::with_capacity(ir_block.register_count);
|
|
||||||
vec.extend(std::iter::repeat_with(|| PipelineData::Empty).take(ir_block.register_count));
|
|
||||||
let mut ctx = EvalContext {
|
|
||||||
engine_state,
|
|
||||||
stack,
|
|
||||||
registers: &mut vec[..],
|
|
||||||
};
|
|
||||||
eval_ir_block_impl::<D>(&mut ctx, block_span, ir_block, input)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// All of the pointers necessary for evaluation
|
/// All of the pointers necessary for evaluation
|
||||||
struct EvalContext<'a> {
|
struct EvalContext<'a> {
|
||||||
engine_state: &'a EngineState,
|
engine_state: &'a EngineState,
|
||||||
|
|
|
@ -5,6 +5,7 @@ mod command;
|
||||||
mod engine_state;
|
mod engine_state;
|
||||||
mod overlay;
|
mod overlay;
|
||||||
mod pattern_match;
|
mod pattern_match;
|
||||||
|
mod register_buf_cache;
|
||||||
mod stack;
|
mod stack;
|
||||||
mod stack_out_dest;
|
mod stack_out_dest;
|
||||||
mod state_delta;
|
mod state_delta;
|
||||||
|
@ -20,6 +21,7 @@ pub use command::*;
|
||||||
pub use engine_state::*;
|
pub use engine_state::*;
|
||||||
pub use overlay::*;
|
pub use overlay::*;
|
||||||
pub use pattern_match::*;
|
pub use pattern_match::*;
|
||||||
|
pub use register_buf_cache::*;
|
||||||
pub use stack::*;
|
pub use stack::*;
|
||||||
pub use stack_out_dest::*;
|
pub use stack_out_dest::*;
|
||||||
pub use state_delta::*;
|
pub use state_delta::*;
|
||||||
|
|
62
crates/nu-protocol/src/engine/register_buf_cache.rs
Normal file
62
crates/nu-protocol/src/engine/register_buf_cache.rs
Normal file
|
@ -0,0 +1,62 @@
|
||||||
|
use std::fmt;
|
||||||
|
|
||||||
|
use crate::PipelineData;
|
||||||
|
|
||||||
|
/// Retains buffers for reuse in IR evaluation, avoiding heap allocation.
|
||||||
|
///
|
||||||
|
/// This is implemented in such a way that [`Clone`] is still possible, by making the fact that the
|
||||||
|
/// buffers can't be preserved on clone completely transparent. The cached buffers are always empty.
|
||||||
|
pub struct RegisterBufCache {
|
||||||
|
bufs: Vec<Vec<PipelineData>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
// SAFETY: because `bufs` only ever contains empty `Vec`s, it doesn't actually contain any of the
|
||||||
|
// data.
|
||||||
|
unsafe impl Send for RegisterBufCache {}
|
||||||
|
unsafe impl Sync for RegisterBufCache {}
|
||||||
|
|
||||||
|
impl RegisterBufCache {
|
||||||
|
/// Create a new cache with no register buffers.
|
||||||
|
pub const fn new() -> Self {
|
||||||
|
RegisterBufCache { bufs: vec![] }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Acquire a new register buffer from the cache. The buffer will be extended to `size` with
|
||||||
|
/// [`Empty`](PipelineData::Empty) elements.
|
||||||
|
pub fn acquire(&mut self, size: usize) -> Vec<PipelineData> {
|
||||||
|
let mut buf = if let Some(buf) = self.bufs.pop() {
|
||||||
|
debug_assert!(buf.is_empty());
|
||||||
|
buf
|
||||||
|
} else {
|
||||||
|
Vec::new()
|
||||||
|
};
|
||||||
|
buf.reserve(size);
|
||||||
|
buf.extend(std::iter::repeat_with(|| PipelineData::Empty).take(size));
|
||||||
|
buf
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Release a used register buffer to the cache. The buffer will be cleared.
|
||||||
|
pub fn release(&mut self, mut buf: Vec<PipelineData>) {
|
||||||
|
// SAFETY: this `clear` is necessary for the `unsafe impl`s to be safe
|
||||||
|
buf.clear();
|
||||||
|
self.bufs.push(buf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Debug for RegisterBufCache {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
let bufs = self.bufs.len();
|
||||||
|
let bytes: usize = self
|
||||||
|
.bufs
|
||||||
|
.iter()
|
||||||
|
.map(|b| b.capacity() * std::mem::size_of::<PipelineData>())
|
||||||
|
.sum();
|
||||||
|
write!(f, "RegisterBufCache({bufs} bufs, {bytes} bytes)")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Clone for RegisterBufCache {
|
||||||
|
fn clone(&self) -> Self {
|
||||||
|
RegisterBufCache::new()
|
||||||
|
}
|
||||||
|
}
|
|
@ -11,6 +11,8 @@ use std::{
|
||||||
sync::Arc,
|
sync::Arc,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
use super::RegisterBufCache;
|
||||||
|
|
||||||
/// Environment variables per overlay
|
/// Environment variables per overlay
|
||||||
pub type EnvVars = HashMap<String, HashMap<String, Value>>;
|
pub type EnvVars = HashMap<String, HashMap<String, Value>>;
|
||||||
|
|
||||||
|
@ -41,6 +43,8 @@ pub struct Stack {
|
||||||
pub env_hidden: HashMap<String, HashSet<String>>,
|
pub env_hidden: HashMap<String, HashSet<String>>,
|
||||||
/// List of active overlays
|
/// List of active overlays
|
||||||
pub active_overlays: Vec<String>,
|
pub active_overlays: Vec<String>,
|
||||||
|
/// Cached register buffers for IR evaluation
|
||||||
|
pub register_buf_cache: RegisterBufCache,
|
||||||
pub recursion_count: u64,
|
pub recursion_count: u64,
|
||||||
pub parent_stack: Option<Arc<Stack>>,
|
pub parent_stack: Option<Arc<Stack>>,
|
||||||
/// Variables that have been deleted (this is used to hide values from parent stack lookups)
|
/// Variables that have been deleted (this is used to hide values from parent stack lookups)
|
||||||
|
@ -68,6 +72,7 @@ impl Stack {
|
||||||
env_vars: Vec::new(),
|
env_vars: Vec::new(),
|
||||||
env_hidden: HashMap::new(),
|
env_hidden: HashMap::new(),
|
||||||
active_overlays: vec![DEFAULT_OVERLAY_NAME.to_string()],
|
active_overlays: vec![DEFAULT_OVERLAY_NAME.to_string()],
|
||||||
|
register_buf_cache: RegisterBufCache::new(),
|
||||||
recursion_count: 0,
|
recursion_count: 0,
|
||||||
parent_stack: None,
|
parent_stack: None,
|
||||||
parent_deletions: vec![],
|
parent_deletions: vec![],
|
||||||
|
@ -85,6 +90,7 @@ impl Stack {
|
||||||
env_vars: parent.env_vars.clone(),
|
env_vars: parent.env_vars.clone(),
|
||||||
env_hidden: parent.env_hidden.clone(),
|
env_hidden: parent.env_hidden.clone(),
|
||||||
active_overlays: parent.active_overlays.clone(),
|
active_overlays: parent.active_overlays.clone(),
|
||||||
|
register_buf_cache: RegisterBufCache::new(),
|
||||||
recursion_count: parent.recursion_count,
|
recursion_count: parent.recursion_count,
|
||||||
vars: vec![],
|
vars: vec![],
|
||||||
parent_deletions: vec![],
|
parent_deletions: vec![],
|
||||||
|
@ -254,6 +260,7 @@ impl Stack {
|
||||||
env_vars,
|
env_vars,
|
||||||
env_hidden: self.env_hidden.clone(),
|
env_hidden: self.env_hidden.clone(),
|
||||||
active_overlays: self.active_overlays.clone(),
|
active_overlays: self.active_overlays.clone(),
|
||||||
|
register_buf_cache: RegisterBufCache::new(),
|
||||||
recursion_count: self.recursion_count,
|
recursion_count: self.recursion_count,
|
||||||
parent_stack: None,
|
parent_stack: None,
|
||||||
parent_deletions: vec![],
|
parent_deletions: vec![],
|
||||||
|
@ -284,6 +291,7 @@ impl Stack {
|
||||||
env_vars,
|
env_vars,
|
||||||
env_hidden: self.env_hidden.clone(),
|
env_hidden: self.env_hidden.clone(),
|
||||||
active_overlays: self.active_overlays.clone(),
|
active_overlays: self.active_overlays.clone(),
|
||||||
|
register_buf_cache: RegisterBufCache::new(),
|
||||||
recursion_count: self.recursion_count,
|
recursion_count: self.recursion_count,
|
||||||
parent_stack: None,
|
parent_stack: None,
|
||||||
parent_deletions: vec![],
|
parent_deletions: vec![],
|
||||||
|
|
Loading…
Reference in New Issue
Block a user