more efficient storage for string data in ir blocks

This commit is contained in:
Devyn Cairns 2024-06-26 01:23:46 -07:00
parent 4fe74e3f8c
commit 7890b3f27a
No known key found for this signature in database
7 changed files with 378 additions and 122 deletions

View File

@ -1,12 +1,10 @@
use std::sync::Arc;
use nu_protocol::{
ast::{
Argument, Block, Call, CellPath, Expr, Expression, Operator, PathMember, Pipeline,
PipelineRedirection, RedirectionSource, RedirectionTarget,
},
engine::StateWorkingSet,
ir::{Instruction, IrBlock, Literal, RedirectMode},
ir::{DataSlice, Instruction, IrBlock, Literal, RedirectMode},
IntoSpanned, OutDest, RegId, ShellError, Span, Spanned, ENV_VARIABLE_ID,
};
@ -264,7 +262,10 @@ fn compile_expression(
Expr::Bool(b) => lit(builder, Literal::Bool(*b)),
Expr::Int(i) => lit(builder, Literal::Int(*i)),
Expr::Float(f) => lit(builder, Literal::Float(*f)),
Expr::Binary(bin) => lit(builder, Literal::Binary(bin.as_slice().into())),
Expr::Binary(bin) => {
let data_slice = builder.data(bin)?;
lit(builder, Literal::Binary(data_slice))
}
Expr::Range(_) => Err(CompileError::Todo("Range")),
Expr::Var(var_id) => builder.push(
Instruction::LoadVariable {
@ -333,29 +334,44 @@ fn compile_expression(
Expr::Keyword(_) => Err(CompileError::Todo("Keyword")),
Expr::ValueWithUnit(_) => Err(CompileError::Todo("ValueWithUnit")),
Expr::DateTime(_) => Err(CompileError::Todo("DateTime")),
Expr::Filepath(path, no_expand) => lit(
builder,
Literal::Filepath {
val: path.as_str().into(),
no_expand: *no_expand,
},
),
Expr::Directory(path, no_expand) => lit(
builder,
Literal::Directory {
val: path.as_str().into(),
no_expand: *no_expand,
},
),
Expr::GlobPattern(path, no_expand) => lit(
builder,
Literal::GlobPattern {
val: path.as_str().into(),
no_expand: *no_expand,
},
),
Expr::String(s) => lit(builder, Literal::String(s.as_str().into())),
Expr::RawString(rs) => lit(builder, Literal::RawString(rs.as_str().into())),
Expr::Filepath(path, no_expand) => {
let val = builder.data(path)?;
lit(
builder,
Literal::Filepath {
val,
no_expand: *no_expand,
},
)
}
Expr::Directory(path, no_expand) => {
let val = builder.data(path)?;
lit(
builder,
Literal::Directory {
val,
no_expand: *no_expand,
},
)
}
Expr::GlobPattern(path, no_expand) => {
let val = builder.data(path)?;
lit(
builder,
Literal::GlobPattern {
val,
no_expand: *no_expand,
},
)
}
Expr::String(s) => {
let data_slice = builder.data(s)?;
lit(builder, Literal::String(data_slice))
}
Expr::RawString(rs) => {
let data_slice = builder.data(rs)?;
lit(builder, Literal::RawString(data_slice))
}
Expr::CellPath(path) => lit(builder, Literal::CellPath(Box::new(path.clone()))),
Expr::FullCellPath(full_cell_path) => {
if matches!(full_cell_path.head.expr, Expr::Var(ENV_VARIABLE_ID)) {
@ -411,9 +427,9 @@ fn compile_call(
// We could technically compile anything that isn't another call safely without worrying about
// the argument state, but we'd have to check all of that first and it just isn't really worth
// it.
enum CompiledArg {
enum CompiledArg<'a> {
Positional(RegId, Span),
Named(Arc<str>, Option<RegId>, Span),
Named(&'a str, Option<RegId>, Span),
Spread(RegId, Span),
}
@ -443,11 +459,9 @@ fn compile_call(
arg_reg.expect("expr() None in non-Named"),
arg.span(),
)),
Argument::Named((name, _, _)) => compiled_args.push(CompiledArg::Named(
name.item.as_str().into(),
arg_reg,
arg.span(),
)),
Argument::Named((name, _, _)) => {
compiled_args.push(CompiledArg::Named(name.item.as_str(), arg_reg, arg.span()))
}
Argument::Unknown(_) => return Err(CompileError::Garbage),
Argument::Spread(_) => compiled_args.push(CompiledArg::Spread(
arg_reg.expect("expr() None in non-Named"),
@ -463,9 +477,11 @@ fn compile_call(
builder.push(Instruction::PushPositional { src: reg }.into_spanned(span))?
}
CompiledArg::Named(name, Some(reg), span) => {
let name = builder.data(name)?;
builder.push(Instruction::PushNamed { name, src: reg }.into_spanned(span))?
}
CompiledArg::Named(name, None, span) => {
let name = builder.data(name)?;
builder.push(Instruction::PushFlag { name }.into_spanned(span))?
}
CompiledArg::Spread(reg, span) => {
@ -566,7 +582,7 @@ fn compile_load_env(
)
} else {
let (key, optional) = match &path[0] {
PathMember::String { val, optional, .. } => (val.as_str().into(), *optional),
PathMember::String { val, optional, .. } => (builder.data(val)?, *optional),
PathMember::Int { span, .. } => return Err(CompileError::AccessEnvByInt(*span)),
};
let tail = &path[1..];
@ -603,6 +619,7 @@ fn compile_load_env(
enum CompileError {
RegisterOverflow,
RegisterUninitialized(RegId),
DataOverflow,
InvalidRedirectMode,
Garbage,
UnsupportedOperatorExpression,
@ -617,6 +634,9 @@ impl CompileError {
CompileError::RegisterUninitialized(reg_id) => {
format!("register {reg_id} is uninitialized when used, possibly reused")
}
CompileError::DataOverflow => {
format!("block contains too much string data: maximum 4 GiB exceeded")
}
CompileError::InvalidRedirectMode => {
"invalid redirect mode: File should not be specified by commands".into()
}
@ -639,6 +659,7 @@ impl CompileError {
struct BlockBuilder {
instructions: Vec<Instruction>,
spans: Vec<Span>,
data: Vec<u8>,
register_allocation_state: Vec<bool>,
}
@ -648,6 +669,7 @@ impl BlockBuilder {
BlockBuilder {
instructions: vec![],
spans: vec![],
data: vec![],
register_allocation_state: vec![true],
}
}
@ -732,6 +754,12 @@ impl BlockBuilder {
decl_id: _,
src_dst: _,
} => (),
Instruction::ListPush { src_dst: _, item } => self.free_register(*item)?,
Instruction::RecordInsert {
src_dst: _,
key: _,
val,
} => self.free_register(*val)?,
Instruction::BinaryOp {
lhs_dst: _,
op: _,
@ -786,11 +814,27 @@ impl BlockBuilder {
self.load_literal(reg_id, Literal::Nothing.into_spanned(Span::unknown()))
}
/// Add data to the `data` array and return a [`DataSlice`] referencing it.
fn data(&mut self, data: impl AsRef<[u8]>) -> Result<DataSlice, CompileError> {
let start = self.data.len();
if start + data.as_ref().len() < u32::MAX as usize {
let slice = DataSlice {
start: start as u32,
len: data.as_ref().len() as u32,
};
self.data.extend_from_slice(data.as_ref());
Ok(slice)
} else {
Err(CompileError::DataOverflow)
}
}
/// Consume the builder and produce the final [`IrBlock`].
fn finish(self) -> IrBlock {
IrBlock {
instructions: self.instructions,
spans: self.spans,
data: self.data.into(),
register_count: self.register_allocation_state.len(),
}
}

View File

@ -1,13 +1,13 @@
use std::fs::File;
use std::{fs::File, sync::Arc};
use nu_path::expand_path_with;
use nu_protocol::{
ast::{Bits, Block, Boolean, CellPath, Comparison, Math, Operator},
debugger::DebugContext,
engine::{Argument, Closure, EngineState, Redirection, Stack},
ir::{Call, Instruction, IrBlock, Literal, RedirectMode},
DeclId, IntoPipelineData, IntoSpanned, OutDest, PipelineData, RegId, ShellError, Span, Value,
VarId,
ir::{Call, DataSlice, Instruction, IrBlock, Literal, RedirectMode},
DeclId, IntoPipelineData, IntoSpanned, OutDest, PipelineData, Range, Record, RegId, ShellError,
Span, Value, VarId,
};
use crate::eval::is_automatic_env_var;
@ -31,6 +31,7 @@ pub fn eval_ir_block<D: DebugContext>(
&mut EvalContext {
engine_state,
stack,
data: &ir_block.data,
args_base,
redirect_out: None,
redirect_err: None,
@ -62,6 +63,7 @@ pub fn eval_ir_block<D: DebugContext>(
struct EvalContext<'a> {
engine_state: &'a EngineState,
stack: &'a mut Stack,
data: &'a Arc<[u8]>,
/// Base index on the argument stack to reset to after a call
args_base: usize,
/// State set by redirect-out
@ -89,6 +91,14 @@ impl<'a> EvalContext<'a> {
let span = data.span().unwrap_or(fallback_span);
data.into_value(span)
}
/// Get a string from data or produce evaluation error if it's invalid UTF-8
fn get_str(&self, slice: DataSlice, error_span: Span) -> Result<&'a str, ShellError> {
std::str::from_utf8(&self.data[slice]).map_err(|_| ShellError::IrEvalError {
msg: format!("data slice does not refer to valid UTF-8: {slice:?}"),
span: Some(error_span),
})
}
}
/// Eval an IR block on the provided slice of registers.
@ -108,7 +118,10 @@ fn eval_ir_block_impl<D: DebugContext>(
while pc < ir_block.instructions.len() {
let instruction = &ir_block.instructions[pc];
let span = &ir_block.spans[pc];
log::trace!("{pc:-4}: {}", instruction.display(ctx.engine_state));
log::trace!(
"{pc:-4}: {}",
instruction.display(ctx.engine_state, ctx.data)
);
match eval_instruction(ctx, instruction, span)? {
InstructionResult::Continue => {
pc += 1;
@ -201,17 +214,19 @@ fn eval_instruction(
Ok(Continue)
}
Instruction::LoadEnv { dst, key } => {
let key = ctx.get_str(*key, *span)?;
if let Some(value) = ctx.stack.get_env_var(ctx.engine_state, key) {
ctx.put_reg(*dst, value.into_pipeline_data());
Ok(Continue)
} else {
Err(ShellError::EnvVarNotFoundAtRuntime {
envvar_name: key.clone().into(),
envvar_name: key.into(),
span: *span,
})
}
}
Instruction::LoadEnvOpt { dst, key } => {
let key = ctx.get_str(*key, *span)?;
let value = ctx
.stack
.get_env_var(ctx.engine_state, key)
@ -220,13 +235,14 @@ fn eval_instruction(
Ok(Continue)
}
Instruction::StoreEnv { key, src } => {
let key = ctx.get_str(*key, *span)?;
let value = ctx.collect_reg(*src, *span)?;
if !is_automatic_env_var(key) {
ctx.stack.add_env_var(key.clone().into(), value);
ctx.stack.add_env_var(key.into(), value);
Ok(Continue)
} else {
Err(ShellError::AutomaticEnvVarSetManually {
envvar_name: key.clone().into(),
envvar_name: key.into(),
span: *span,
})
}
@ -247,7 +263,8 @@ fn eval_instruction(
}
Instruction::PushFlag { name } => {
ctx.stack.argument_stack.push(Argument::Flag {
name: name.clone(),
data: ctx.data.clone(),
name: *name,
span: *span,
});
Ok(Continue)
@ -255,7 +272,8 @@ fn eval_instruction(
Instruction::PushNamed { name, src } => {
let val = ctx.collect_reg(*src, *span)?;
ctx.stack.argument_stack.push(Argument::Named {
name: name.clone(),
data: ctx.data.clone(),
name: *name,
span: *span,
val,
});
@ -277,6 +295,27 @@ fn eval_instruction(
ctx.put_reg(*src_dst, result);
Ok(Continue)
}
Instruction::ListPush { src_dst, item } => {
let list_value = ctx.collect_reg(*src_dst, *span)?;
let item = ctx.collect_reg(*item, *span)?;
let list_span = list_value.span();
let mut list = list_value.into_list()?;
list.push(item);
ctx.put_reg(*src_dst, Value::list(list, list_span).into_pipeline_data());
Ok(Continue)
}
Instruction::RecordInsert { src_dst, key, val } => {
let record_value = ctx.collect_reg(*src_dst, *span)?;
let val = ctx.collect_reg(*val, *span)?;
let record_span = record_value.span();
let mut record = record_value.into_record()?;
record.insert(ctx.get_str(*key, *span)?, val);
ctx.put_reg(
*src_dst,
Value::record(record, record_span).into_pipeline_data(),
);
Ok(Continue)
}
Instruction::BinaryOp { lhs_dst, op, rhs } => binary_op(ctx, *lhs_dst, op, *rhs, *span),
Instruction::FollowCellPath { src_dst, path } => {
let data = ctx.take_reg(*src_dst);
@ -378,7 +417,7 @@ fn literal_value(
Literal::Bool(b) => Value::bool(*b, span),
Literal::Int(i) => Value::int(*i, span),
Literal::Float(f) => Value::float(*f, span),
Literal::Binary(bin) => Value::binary(bin.clone(), span),
Literal::Binary(bin) => Value::binary(&ctx.data[*bin], span),
Literal::Block(block_id) => Value::closure(
Closure {
block_id: *block_id,
@ -401,22 +440,30 @@ fn literal_value(
span,
)
}
Literal::List(literals) => {
let mut vec = Vec::with_capacity(literals.len());
for elem in literals.iter() {
vec.push(literal_value(ctx, &elem.item, elem.span)?);
}
Value::list(vec, span)
Literal::Range {
start,
step,
end,
inclusion,
} => {
let start = ctx.collect_reg(*start, span)?;
let step = ctx.collect_reg(*step, span)?;
let end = ctx.collect_reg(*end, span)?;
let range = Range::new(start, step, end, *inclusion, span)?;
Value::range(range, span)
}
Literal::List { capacity } => Value::list(Vec::with_capacity(*capacity), span),
Literal::Record { capacity } => Value::record(Record::with_capacity(*capacity), span),
Literal::Filepath {
val: path,
no_expand,
} => {
let path = ctx.get_str(*path, span)?;
if *no_expand {
Value::string(path.as_ref(), span)
Value::string(path, span)
} else {
let cwd = ctx.engine_state.cwd(Some(ctx.stack))?;
let path = expand_path_with(path.as_ref(), cwd, true);
let path = expand_path_with(path, cwd, true);
Value::string(path.to_string_lossy(), span)
}
@ -425,20 +472,23 @@ fn literal_value(
val: path,
no_expand,
} => {
if path.as_ref() == "-" {
let path = ctx.get_str(*path, span)?;
if path == "-" {
Value::string("-", span)
} else if *no_expand {
Value::string(path.as_ref(), span)
Value::string(path, span)
} else {
let cwd = ctx.engine_state.cwd(Some(ctx.stack)).unwrap_or_default();
let path = expand_path_with(path.as_ref(), cwd, true);
let path = expand_path_with(path, cwd, true);
Value::string(path.to_string_lossy(), span)
}
}
Literal::GlobPattern { val, no_expand } => Value::glob(val.as_ref(), *no_expand, span),
Literal::String(s) => Value::string(s.clone(), span),
Literal::RawString(s) => Value::string(s.clone(), span),
Literal::GlobPattern { val, no_expand } => {
Value::glob(ctx.get_str(*val, span)?, *no_expand, span)
}
Literal::String(s) => Value::string(ctx.get_str(*s, span)?, span),
Literal::RawString(s) => Value::string(ctx.get_str(*s, span)?, span),
Literal::CellPath(path) => Value::cell_path(CellPath::clone(&path), span),
Literal::Nothing => Value::nothing(span),
})

View File

@ -159,17 +159,28 @@ fn ir_call_to_extern_call(
// Add the arguments, reformatting named arguments into string positionals
for index in 0..call.args_len {
match &call.arguments(stack)[index] {
engine::Argument::Flag { name, span } => {
engine::Argument::Flag { data, name, span } => {
let name_arg = engine::Argument::Positional {
span: *span,
val: Value::string(name.as_ref(), *span),
val: Value::string(
std::str::from_utf8(&data[*name]).expect("invalid flag name"),
*span,
),
};
extern_call.add_argument(stack, name_arg);
}
engine::Argument::Named { name, span, val } => {
engine::Argument::Named {
data,
name,
span,
val,
} => {
let name_arg = engine::Argument::Positional {
span: *span,
val: Value::string(name.as_ref(), *span),
val: Value::string(
std::str::from_utf8(&data[*name]).expect("invalid arg name"),
*span,
),
};
let val_arg = engine::Argument::Positional {
span: *span,

View File

@ -1,6 +1,6 @@
use std::sync::Arc;
use crate::{Span, Value};
use crate::{ir::DataSlice, Span, Value};
/// Represents a fully evaluated argument to a call.
#[derive(Debug, Clone)]
@ -16,12 +16,14 @@ pub enum Argument {
},
/// A named argument with no value, e.g. `--flag`
Flag {
name: Arc<str>,
data: Arc<[u8]>,
name: DataSlice,
span: Span,
},
/// A named argument with a value, e.g. `--flag value` or `--flag=`
Named {
name: Arc<str>,
data: Arc<[u8]>,
name: DataSlice,
span: Span,
val: Value,
},

View File

@ -1,8 +1,12 @@
use std::sync::Arc;
use crate::{
engine::{self, Argument, Stack},
DeclId, ShellError, Span, Spanned, Value,
};
use super::DataSlice;
/// Contains the information for a call being made to a declared command.
#[derive(Debug, Clone)]
pub struct Call {
@ -59,24 +63,30 @@ impl Call {
}
pub fn named_iter<'a>(
&self,
&'a self,
stack: &'a Stack,
) -> impl Iterator<Item = (Spanned<&'a str>, Option<&'a Value>)> + 'a {
self.arguments(stack).iter().filter_map(
|arg: &Argument| -> Option<(Spanned<&str>, Option<&Value>)> {
match arg {
Argument::Flag { name, span, .. } => Some((
Argument::Flag {
data, name, span, ..
} => Some((
Spanned {
item: name,
item: std::str::from_utf8(&data[*name]).expect("invalid arg name"),
span: *span,
},
None,
)),
Argument::Named {
name, span, val, ..
data,
name,
span,
val,
..
} => Some((
Spanned {
item: name,
item: std::str::from_utf8(&data[*name]).expect("invalid arg name"),
span: *span,
},
Some(val),
@ -88,8 +98,20 @@ impl Call {
}
pub fn get_named_arg<'a>(&self, stack: &'a Stack, flag_name: &str) -> Option<&'a Value> {
self.named_iter(stack)
.find_map(|(name, val)| (name.item == flag_name).then_some(val))
// Optimized to avoid str::from_utf8()
self.arguments(stack)
.iter()
.find_map(|arg: &Argument| -> Option<Option<&Value>> {
match arg {
Argument::Flag { data, name, .. } if &data[*name] == flag_name.as_bytes() => {
Some(None)
}
Argument::Named {
data, name, val, ..
} if &data[*name] == flag_name.as_bytes() => Some(Some(val)),
_ => None,
}
})
.flatten()
}
@ -192,13 +214,12 @@ impl CallBuilder {
/// Add a flag (no-value named) argument to the [`Stack`] and reference it from the [`Call`].
pub fn add_flag(&mut self, stack: &mut Stack, name: impl AsRef<str>, span: Span) -> &mut Self {
self.add_argument(
stack,
Argument::Flag {
name: name.as_ref().into(),
span,
},
)
let data: Arc<[u8]> = name.as_ref().as_bytes().into();
let name = DataSlice {
start: 0,
len: data.len().try_into().expect("flag name too big"),
};
self.add_argument(stack, Argument::Flag { data, name, span })
}
/// Add a named argument to the [`Stack`] and reference it from the [`Call`].
@ -209,10 +230,16 @@ impl CallBuilder {
span: Span,
val: Value,
) -> &mut Self {
let data: Arc<[u8]> = name.as_ref().as_bytes().into();
let name = DataSlice {
start: 0,
len: data.len().try_into().expect("arg name too big"),
};
self.add_argument(
stack,
Argument::Named {
name: name.as_ref().into(),
data,
name,
span,
val,
},

View File

@ -2,7 +2,7 @@ use std::fmt;
use crate::{engine::EngineState, DeclId, VarId};
use super::{Instruction, IrBlock, RedirectMode};
use super::{DataSlice, Instruction, IrBlock, Literal, RedirectMode};
pub struct FmtIrBlock<'a> {
pub(super) engine_state: &'a EngineState,
@ -14,11 +14,13 @@ impl<'a> fmt::Display for FmtIrBlock<'a> {
let plural = |count| if count == 1 { "" } else { "s" };
writeln!(
f,
"# {} register{}, {} instruction{}",
"# {} register{}, {} instruction{}, {} byte{} of data",
self.ir_block.register_count,
plural(self.ir_block.register_count),
self.ir_block.instructions.len(),
plural(self.ir_block.instructions.len()),
self.ir_block.data.len(),
plural(self.ir_block.data.len()),
)?;
for (index, instruction) in self.ir_block.instructions.iter().enumerate() {
writeln!(
@ -27,7 +29,8 @@ impl<'a> fmt::Display for FmtIrBlock<'a> {
index,
FmtInstruction {
engine_state: self.engine_state,
instruction
instruction,
data: &self.ir_block.data,
}
)?;
}
@ -38,6 +41,7 @@ impl<'a> fmt::Display for FmtIrBlock<'a> {
pub struct FmtInstruction<'a> {
pub(super) engine_state: &'a EngineState,
pub(super) instruction: &'a Instruction,
pub(super) data: &'a [u8],
}
impl<'a> fmt::Display for FmtInstruction<'a> {
@ -46,7 +50,11 @@ impl<'a> fmt::Display for FmtInstruction<'a> {
match self.instruction {
Instruction::LoadLiteral { dst, lit } => {
write!(f, "{:WIDTH$} {dst}, {lit:?}", "load-literal")
let lit = FmtLiteral {
literal: lit,
data: self.data,
};
write!(f, "{:WIDTH$} {dst}, {lit}", "load-literal")
}
Instruction::Move { dst, src } => {
write!(f, "{:WIDTH$} {dst}, {src}", "move")
@ -69,13 +77,16 @@ impl<'a> fmt::Display for FmtInstruction<'a> {
write!(f, "{:WIDTH$} {var}, {src}", "store-variable")
}
Instruction::LoadEnv { dst, key } => {
write!(f, "{:WIDTH$} {dst}, {key:?}", "load-env")
let key = FmtData(self.data, *key);
write!(f, "{:WIDTH$} {dst}, {key}", "load-env")
}
Instruction::LoadEnvOpt { dst, key } => {
write!(f, "{:WIDTH$} {dst}, {key:?}", "load-env-opt")
let key = FmtData(self.data, *key);
write!(f, "{:WIDTH$} {dst}, {key}", "load-env-opt")
}
Instruction::StoreEnv { key, src } => {
write!(f, "{:WIDTH$} {key:?}, {src}", "store-env")
let key = FmtData(self.data, *key);
write!(f, "{:WIDTH$} {key}, {src}", "store-env")
}
Instruction::PushPositional { src } => {
write!(f, "{:WIDTH$} {src}", "push-positional")
@ -84,10 +95,12 @@ impl<'a> fmt::Display for FmtInstruction<'a> {
write!(f, "{:WIDTH$} {src}", "append-rest")
}
Instruction::PushFlag { name } => {
write!(f, "{:WIDTH$} {name:?}", "push-flag")
let name = FmtData(self.data, *name);
write!(f, "{:WIDTH$} {name}", "push-flag")
}
Instruction::PushNamed { name, src } => {
write!(f, "{:WIDTH$} {name:?}, {src}", "push-named")
let name = FmtData(self.data, *name);
write!(f, "{:WIDTH$} {name}, {src}", "push-named")
}
Instruction::RedirectOut { mode } => {
write!(f, "{:WIDTH$} {mode}", "redirect-out")
@ -99,6 +112,13 @@ impl<'a> fmt::Display for FmtInstruction<'a> {
let decl = FmtDecl::new(self.engine_state, *decl_id);
write!(f, "{:WIDTH$} {decl}, {src_dst}", "call")
}
Instruction::ListPush { src_dst, item } => {
write!(f, "{:WIDTH$} {src_dst}, {item}", "list-push")
}
Instruction::RecordInsert { src_dst, key, val } => {
let key = FmtData(self.data, *key);
write!(f, "{:WIDTH$} {src_dst}, {key}, {val}", "record-insert")
}
Instruction::BinaryOp { lhs_dst, op, rhs } => {
write!(f, "{:WIDTH$} {lhs_dst}, {op:?}, {rhs}", "binary-op")
}
@ -170,7 +190,7 @@ impl fmt::Display for FmtVar<'_> {
}
}
impl std::fmt::Display for RedirectMode {
impl fmt::Display for RedirectMode {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
RedirectMode::Pipe => write!(f, "pipe"),
@ -181,3 +201,62 @@ impl std::fmt::Display for RedirectMode {
}
}
}
struct FmtData<'a>(&'a [u8], DataSlice);
impl<'a> fmt::Display for FmtData<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if let Ok(s) = std::str::from_utf8(&self.0[self.1]) {
// Write as string
write!(f, "{s:?}")
} else {
// Write as byte array
write!(f, "0x{:x?}", self.0)
}
}
}
struct FmtLiteral<'a> {
literal: &'a Literal,
data: &'a [u8],
}
impl<'a> fmt::Display for FmtLiteral<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self.literal {
Literal::Bool(b) => write!(f, "bool({b:?})"),
Literal::Int(i) => write!(f, "int({i:?})"),
Literal::Float(fl) => write!(f, "float({fl:?})"),
Literal::Binary(b) => write!(f, "binary({})", FmtData(self.data, *b)),
Literal::Block(id) => write!(f, "block({id})"),
Literal::Closure(id) => write!(f, "closure({id})"),
Literal::Range {
start,
step,
end,
inclusion,
} => write!(f, "range({start}, {step}, {end}, {inclusion:?})"),
Literal::List { capacity } => write!(f, "list(capacity = {capacity})"),
Literal::Record { capacity } => write!(f, "record(capacity = {capacity})"),
Literal::Filepath { val, no_expand } => write!(
f,
"filepath({}, no_expand = {no_expand:?})",
FmtData(self.data, *val)
),
Literal::Directory { val, no_expand } => write!(
f,
"directory({}, no_expand = {no_expand:?})",
FmtData(self.data, *val)
),
Literal::GlobPattern { val, no_expand } => write!(
f,
"glob-pattern({}, no_expand = {no_expand:?})",
FmtData(self.data, *val)
),
Literal::String(s) => write!(f, "string({})", FmtData(self.data, *s)),
Literal::RawString(rs) => write!(f, "raw-string({})", FmtData(self.data, *rs)),
Literal::CellPath(p) => write!(f, "cell-path({p})"),
Literal::Nothing => write!(f, "nothing"),
}
}
}

View File

@ -1,9 +1,9 @@
use std::sync::Arc;
use crate::{
ast::{CellPath, Operator},
ast::{CellPath, Operator, RangeInclusion},
engine::EngineState,
BlockId, DeclId, RegId, Span, Spanned, VarId,
BlockId, DeclId, RegId, Span, VarId,
};
use serde::{Deserialize, Serialize};
@ -18,6 +18,8 @@ pub use display::{FmtInstruction, FmtIrBlock};
pub struct IrBlock {
pub instructions: Vec<Instruction>,
pub spans: Vec<Span>,
#[serde(with = "serde_arc_u8_array")]
pub data: Arc<[u8]>,
pub register_count: usize,
}
@ -32,6 +34,22 @@ impl IrBlock {
}
}
/// A slice into the `data` array of a block. This is a compact and cache-friendly way to store
/// string data that a block uses.
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
pub struct DataSlice {
pub start: u32,
pub len: u32,
}
impl std::ops::Index<DataSlice> for [u8] {
type Output = [u8];
fn index(&self, index: DataSlice) -> &Self::Output {
&self[index.start as usize..(index.start as usize + index.len as usize)]
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum Instruction {
/// Load a literal value into the `dst` register
@ -49,27 +67,20 @@ pub enum Instruction {
/// Store the value of a variable from the `src` register
StoreVariable { var_id: VarId, src: RegId },
/// Load the value of an environment variable into the `dst` register
LoadEnv { dst: RegId, key: Box<str> },
LoadEnv { dst: RegId, key: DataSlice },
/// Load the value of an environment variable into the `dst` register, or `Nothing` if it
/// doesn't exist
LoadEnvOpt { dst: RegId, key: Box<str> },
LoadEnvOpt { dst: RegId, key: DataSlice },
/// Store the value of an environment variable from the `src` register
StoreEnv { key: Box<str>, src: RegId },
StoreEnv { key: DataSlice, src: RegId },
/// Add a positional arg to the next call
PushPositional { src: RegId },
/// Add a list of args to the next call (spread/rest)
AppendRest { src: RegId },
/// Add a named arg with no value to the next call.
PushFlag {
#[serde(with = "serde_arc_str")]
name: Arc<str>,
},
PushFlag { name: DataSlice },
/// Add a named arg with a value to the next call.
PushNamed {
#[serde(with = "serde_arc_str")]
name: Arc<str>,
src: RegId,
},
PushNamed { name: DataSlice, src: RegId },
/// Set the redirection for stdout for the next call (only)
RedirectOut { mode: RedirectMode },
/// Set the redirection for stderr for the next call (only)
@ -77,6 +88,14 @@ pub enum Instruction {
/// Make a call. The input is taken from `src_dst`, and the output is placed in `src_dst`,
/// overwriting it. The argument stack is used implicitly and cleared when the call ends.
Call { decl_id: DeclId, src_dst: RegId },
/// Push a value onto the end of a list. Used to construct list literals.
ListPush { src_dst: RegId, item: RegId },
/// Insert a key-value pair into a record. Any existing value for the key is overwritten.
RecordInsert {
src_dst: RegId,
key: DataSlice,
val: RegId,
},
/// Do a binary operation on `lhs_dst` (left) and `rhs` (right) and write the result to
/// `lhs_dst`.
BinaryOp {
@ -96,7 +115,6 @@ pub enum Instruction {
path: RegId,
new_value: RegId,
},
/// Update a cell path
/// Jump to an offset in this block
Jump { index: usize },
/// Branch to an offset in this block if the value of the `cond` register is a true boolean,
@ -109,10 +127,15 @@ pub enum Instruction {
impl Instruction {
/// Returns a value that can be formatted with [`Display`](std::fmt::Display) to show a detailed
/// listing of the instruction.
pub fn display<'a>(&'a self, engine_state: &'a EngineState) -> FmtInstruction<'a> {
pub fn display<'a>(
&'a self,
engine_state: &'a EngineState,
data: &'a [u8],
) -> FmtInstruction<'a> {
FmtInstruction {
engine_state,
instruction: self,
data,
}
}
}
@ -128,15 +151,35 @@ pub enum Literal {
Bool(bool),
Int(i64),
Float(f64),
Binary(Box<[u8]>),
Binary(DataSlice),
Block(BlockId),
Closure(BlockId),
List(Box<[Spanned<Literal>]>),
Filepath { val: Box<str>, no_expand: bool },
Directory { val: Box<str>, no_expand: bool },
GlobPattern { val: Box<str>, no_expand: bool },
String(Box<str>),
RawString(Box<str>),
Range {
start: RegId,
step: RegId,
end: RegId,
inclusion: RangeInclusion,
},
List {
capacity: usize,
},
Record {
capacity: usize,
},
Filepath {
val: DataSlice,
no_expand: bool,
},
Directory {
val: DataSlice,
no_expand: bool,
},
GlobPattern {
val: DataSlice,
no_expand: bool,
},
String(DataSlice),
RawString(DataSlice),
CellPath(Box<CellPath>),
Nothing,
}
@ -163,23 +206,23 @@ pub enum RedirectMode {
},
}
/// Just a hack to allow `Arc<str>` to be serialized and deserialized
mod serde_arc_str {
/// Just a hack to allow `Arc<[u8]>` to be serialized and deserialized
mod serde_arc_u8_array {
use serde::{Deserialize, Serialize};
use std::sync::Arc;
pub fn serialize<S>(string: &Arc<str>, ser: S) -> Result<S::Ok, S::Error>
pub fn serialize<S>(data: &Arc<[u8]>, ser: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
string.as_ref().serialize(ser)
data.as_ref().serialize(ser)
}
pub fn deserialize<'de, D>(de: D) -> Result<Arc<str>, D::Error>
pub fn deserialize<'de, D>(de: D) -> Result<Arc<[u8]>, D::Error>
where
D: serde::Deserializer<'de>,
{
let string: &'de str = Deserialize::deserialize(de)?;
Ok(string.into())
let data: &'de [u8] = Deserialize::deserialize(de)?;
Ok(data.into())
}
}