more efficient storage for string data in ir blocks
This commit is contained in:
parent
4fe74e3f8c
commit
7890b3f27a
|
@ -1,12 +1,10 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use nu_protocol::{
|
||||
ast::{
|
||||
Argument, Block, Call, CellPath, Expr, Expression, Operator, PathMember, Pipeline,
|
||||
PipelineRedirection, RedirectionSource, RedirectionTarget,
|
||||
},
|
||||
engine::StateWorkingSet,
|
||||
ir::{Instruction, IrBlock, Literal, RedirectMode},
|
||||
ir::{DataSlice, Instruction, IrBlock, Literal, RedirectMode},
|
||||
IntoSpanned, OutDest, RegId, ShellError, Span, Spanned, ENV_VARIABLE_ID,
|
||||
};
|
||||
|
||||
|
@ -264,7 +262,10 @@ fn compile_expression(
|
|||
Expr::Bool(b) => lit(builder, Literal::Bool(*b)),
|
||||
Expr::Int(i) => lit(builder, Literal::Int(*i)),
|
||||
Expr::Float(f) => lit(builder, Literal::Float(*f)),
|
||||
Expr::Binary(bin) => lit(builder, Literal::Binary(bin.as_slice().into())),
|
||||
Expr::Binary(bin) => {
|
||||
let data_slice = builder.data(bin)?;
|
||||
lit(builder, Literal::Binary(data_slice))
|
||||
}
|
||||
Expr::Range(_) => Err(CompileError::Todo("Range")),
|
||||
Expr::Var(var_id) => builder.push(
|
||||
Instruction::LoadVariable {
|
||||
|
@ -333,29 +334,44 @@ fn compile_expression(
|
|||
Expr::Keyword(_) => Err(CompileError::Todo("Keyword")),
|
||||
Expr::ValueWithUnit(_) => Err(CompileError::Todo("ValueWithUnit")),
|
||||
Expr::DateTime(_) => Err(CompileError::Todo("DateTime")),
|
||||
Expr::Filepath(path, no_expand) => lit(
|
||||
builder,
|
||||
Literal::Filepath {
|
||||
val: path.as_str().into(),
|
||||
no_expand: *no_expand,
|
||||
},
|
||||
),
|
||||
Expr::Directory(path, no_expand) => lit(
|
||||
builder,
|
||||
Literal::Directory {
|
||||
val: path.as_str().into(),
|
||||
no_expand: *no_expand,
|
||||
},
|
||||
),
|
||||
Expr::GlobPattern(path, no_expand) => lit(
|
||||
builder,
|
||||
Literal::GlobPattern {
|
||||
val: path.as_str().into(),
|
||||
no_expand: *no_expand,
|
||||
},
|
||||
),
|
||||
Expr::String(s) => lit(builder, Literal::String(s.as_str().into())),
|
||||
Expr::RawString(rs) => lit(builder, Literal::RawString(rs.as_str().into())),
|
||||
Expr::Filepath(path, no_expand) => {
|
||||
let val = builder.data(path)?;
|
||||
lit(
|
||||
builder,
|
||||
Literal::Filepath {
|
||||
val,
|
||||
no_expand: *no_expand,
|
||||
},
|
||||
)
|
||||
}
|
||||
Expr::Directory(path, no_expand) => {
|
||||
let val = builder.data(path)?;
|
||||
lit(
|
||||
builder,
|
||||
Literal::Directory {
|
||||
val,
|
||||
no_expand: *no_expand,
|
||||
},
|
||||
)
|
||||
}
|
||||
Expr::GlobPattern(path, no_expand) => {
|
||||
let val = builder.data(path)?;
|
||||
lit(
|
||||
builder,
|
||||
Literal::GlobPattern {
|
||||
val,
|
||||
no_expand: *no_expand,
|
||||
},
|
||||
)
|
||||
}
|
||||
Expr::String(s) => {
|
||||
let data_slice = builder.data(s)?;
|
||||
lit(builder, Literal::String(data_slice))
|
||||
}
|
||||
Expr::RawString(rs) => {
|
||||
let data_slice = builder.data(rs)?;
|
||||
lit(builder, Literal::RawString(data_slice))
|
||||
}
|
||||
Expr::CellPath(path) => lit(builder, Literal::CellPath(Box::new(path.clone()))),
|
||||
Expr::FullCellPath(full_cell_path) => {
|
||||
if matches!(full_cell_path.head.expr, Expr::Var(ENV_VARIABLE_ID)) {
|
||||
|
@ -411,9 +427,9 @@ fn compile_call(
|
|||
// We could technically compile anything that isn't another call safely without worrying about
|
||||
// the argument state, but we'd have to check all of that first and it just isn't really worth
|
||||
// it.
|
||||
enum CompiledArg {
|
||||
enum CompiledArg<'a> {
|
||||
Positional(RegId, Span),
|
||||
Named(Arc<str>, Option<RegId>, Span),
|
||||
Named(&'a str, Option<RegId>, Span),
|
||||
Spread(RegId, Span),
|
||||
}
|
||||
|
||||
|
@ -443,11 +459,9 @@ fn compile_call(
|
|||
arg_reg.expect("expr() None in non-Named"),
|
||||
arg.span(),
|
||||
)),
|
||||
Argument::Named((name, _, _)) => compiled_args.push(CompiledArg::Named(
|
||||
name.item.as_str().into(),
|
||||
arg_reg,
|
||||
arg.span(),
|
||||
)),
|
||||
Argument::Named((name, _, _)) => {
|
||||
compiled_args.push(CompiledArg::Named(name.item.as_str(), arg_reg, arg.span()))
|
||||
}
|
||||
Argument::Unknown(_) => return Err(CompileError::Garbage),
|
||||
Argument::Spread(_) => compiled_args.push(CompiledArg::Spread(
|
||||
arg_reg.expect("expr() None in non-Named"),
|
||||
|
@ -463,9 +477,11 @@ fn compile_call(
|
|||
builder.push(Instruction::PushPositional { src: reg }.into_spanned(span))?
|
||||
}
|
||||
CompiledArg::Named(name, Some(reg), span) => {
|
||||
let name = builder.data(name)?;
|
||||
builder.push(Instruction::PushNamed { name, src: reg }.into_spanned(span))?
|
||||
}
|
||||
CompiledArg::Named(name, None, span) => {
|
||||
let name = builder.data(name)?;
|
||||
builder.push(Instruction::PushFlag { name }.into_spanned(span))?
|
||||
}
|
||||
CompiledArg::Spread(reg, span) => {
|
||||
|
@ -566,7 +582,7 @@ fn compile_load_env(
|
|||
)
|
||||
} else {
|
||||
let (key, optional) = match &path[0] {
|
||||
PathMember::String { val, optional, .. } => (val.as_str().into(), *optional),
|
||||
PathMember::String { val, optional, .. } => (builder.data(val)?, *optional),
|
||||
PathMember::Int { span, .. } => return Err(CompileError::AccessEnvByInt(*span)),
|
||||
};
|
||||
let tail = &path[1..];
|
||||
|
@ -603,6 +619,7 @@ fn compile_load_env(
|
|||
enum CompileError {
|
||||
RegisterOverflow,
|
||||
RegisterUninitialized(RegId),
|
||||
DataOverflow,
|
||||
InvalidRedirectMode,
|
||||
Garbage,
|
||||
UnsupportedOperatorExpression,
|
||||
|
@ -617,6 +634,9 @@ impl CompileError {
|
|||
CompileError::RegisterUninitialized(reg_id) => {
|
||||
format!("register {reg_id} is uninitialized when used, possibly reused")
|
||||
}
|
||||
CompileError::DataOverflow => {
|
||||
format!("block contains too much string data: maximum 4 GiB exceeded")
|
||||
}
|
||||
CompileError::InvalidRedirectMode => {
|
||||
"invalid redirect mode: File should not be specified by commands".into()
|
||||
}
|
||||
|
@ -639,6 +659,7 @@ impl CompileError {
|
|||
struct BlockBuilder {
|
||||
instructions: Vec<Instruction>,
|
||||
spans: Vec<Span>,
|
||||
data: Vec<u8>,
|
||||
register_allocation_state: Vec<bool>,
|
||||
}
|
||||
|
||||
|
@ -648,6 +669,7 @@ impl BlockBuilder {
|
|||
BlockBuilder {
|
||||
instructions: vec![],
|
||||
spans: vec![],
|
||||
data: vec![],
|
||||
register_allocation_state: vec![true],
|
||||
}
|
||||
}
|
||||
|
@ -732,6 +754,12 @@ impl BlockBuilder {
|
|||
decl_id: _,
|
||||
src_dst: _,
|
||||
} => (),
|
||||
Instruction::ListPush { src_dst: _, item } => self.free_register(*item)?,
|
||||
Instruction::RecordInsert {
|
||||
src_dst: _,
|
||||
key: _,
|
||||
val,
|
||||
} => self.free_register(*val)?,
|
||||
Instruction::BinaryOp {
|
||||
lhs_dst: _,
|
||||
op: _,
|
||||
|
@ -786,11 +814,27 @@ impl BlockBuilder {
|
|||
self.load_literal(reg_id, Literal::Nothing.into_spanned(Span::unknown()))
|
||||
}
|
||||
|
||||
/// Add data to the `data` array and return a [`DataSlice`] referencing it.
|
||||
fn data(&mut self, data: impl AsRef<[u8]>) -> Result<DataSlice, CompileError> {
|
||||
let start = self.data.len();
|
||||
if start + data.as_ref().len() < u32::MAX as usize {
|
||||
let slice = DataSlice {
|
||||
start: start as u32,
|
||||
len: data.as_ref().len() as u32,
|
||||
};
|
||||
self.data.extend_from_slice(data.as_ref());
|
||||
Ok(slice)
|
||||
} else {
|
||||
Err(CompileError::DataOverflow)
|
||||
}
|
||||
}
|
||||
|
||||
/// Consume the builder and produce the final [`IrBlock`].
|
||||
fn finish(self) -> IrBlock {
|
||||
IrBlock {
|
||||
instructions: self.instructions,
|
||||
spans: self.spans,
|
||||
data: self.data.into(),
|
||||
register_count: self.register_allocation_state.len(),
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,13 +1,13 @@
|
|||
use std::fs::File;
|
||||
use std::{fs::File, sync::Arc};
|
||||
|
||||
use nu_path::expand_path_with;
|
||||
use nu_protocol::{
|
||||
ast::{Bits, Block, Boolean, CellPath, Comparison, Math, Operator},
|
||||
debugger::DebugContext,
|
||||
engine::{Argument, Closure, EngineState, Redirection, Stack},
|
||||
ir::{Call, Instruction, IrBlock, Literal, RedirectMode},
|
||||
DeclId, IntoPipelineData, IntoSpanned, OutDest, PipelineData, RegId, ShellError, Span, Value,
|
||||
VarId,
|
||||
ir::{Call, DataSlice, Instruction, IrBlock, Literal, RedirectMode},
|
||||
DeclId, IntoPipelineData, IntoSpanned, OutDest, PipelineData, Range, Record, RegId, ShellError,
|
||||
Span, Value, VarId,
|
||||
};
|
||||
|
||||
use crate::eval::is_automatic_env_var;
|
||||
|
@ -31,6 +31,7 @@ pub fn eval_ir_block<D: DebugContext>(
|
|||
&mut EvalContext {
|
||||
engine_state,
|
||||
stack,
|
||||
data: &ir_block.data,
|
||||
args_base,
|
||||
redirect_out: None,
|
||||
redirect_err: None,
|
||||
|
@ -62,6 +63,7 @@ pub fn eval_ir_block<D: DebugContext>(
|
|||
struct EvalContext<'a> {
|
||||
engine_state: &'a EngineState,
|
||||
stack: &'a mut Stack,
|
||||
data: &'a Arc<[u8]>,
|
||||
/// Base index on the argument stack to reset to after a call
|
||||
args_base: usize,
|
||||
/// State set by redirect-out
|
||||
|
@ -89,6 +91,14 @@ impl<'a> EvalContext<'a> {
|
|||
let span = data.span().unwrap_or(fallback_span);
|
||||
data.into_value(span)
|
||||
}
|
||||
|
||||
/// Get a string from data or produce evaluation error if it's invalid UTF-8
|
||||
fn get_str(&self, slice: DataSlice, error_span: Span) -> Result<&'a str, ShellError> {
|
||||
std::str::from_utf8(&self.data[slice]).map_err(|_| ShellError::IrEvalError {
|
||||
msg: format!("data slice does not refer to valid UTF-8: {slice:?}"),
|
||||
span: Some(error_span),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Eval an IR block on the provided slice of registers.
|
||||
|
@ -108,7 +118,10 @@ fn eval_ir_block_impl<D: DebugContext>(
|
|||
while pc < ir_block.instructions.len() {
|
||||
let instruction = &ir_block.instructions[pc];
|
||||
let span = &ir_block.spans[pc];
|
||||
log::trace!("{pc:-4}: {}", instruction.display(ctx.engine_state));
|
||||
log::trace!(
|
||||
"{pc:-4}: {}",
|
||||
instruction.display(ctx.engine_state, ctx.data)
|
||||
);
|
||||
match eval_instruction(ctx, instruction, span)? {
|
||||
InstructionResult::Continue => {
|
||||
pc += 1;
|
||||
|
@ -201,17 +214,19 @@ fn eval_instruction(
|
|||
Ok(Continue)
|
||||
}
|
||||
Instruction::LoadEnv { dst, key } => {
|
||||
let key = ctx.get_str(*key, *span)?;
|
||||
if let Some(value) = ctx.stack.get_env_var(ctx.engine_state, key) {
|
||||
ctx.put_reg(*dst, value.into_pipeline_data());
|
||||
Ok(Continue)
|
||||
} else {
|
||||
Err(ShellError::EnvVarNotFoundAtRuntime {
|
||||
envvar_name: key.clone().into(),
|
||||
envvar_name: key.into(),
|
||||
span: *span,
|
||||
})
|
||||
}
|
||||
}
|
||||
Instruction::LoadEnvOpt { dst, key } => {
|
||||
let key = ctx.get_str(*key, *span)?;
|
||||
let value = ctx
|
||||
.stack
|
||||
.get_env_var(ctx.engine_state, key)
|
||||
|
@ -220,13 +235,14 @@ fn eval_instruction(
|
|||
Ok(Continue)
|
||||
}
|
||||
Instruction::StoreEnv { key, src } => {
|
||||
let key = ctx.get_str(*key, *span)?;
|
||||
let value = ctx.collect_reg(*src, *span)?;
|
||||
if !is_automatic_env_var(key) {
|
||||
ctx.stack.add_env_var(key.clone().into(), value);
|
||||
ctx.stack.add_env_var(key.into(), value);
|
||||
Ok(Continue)
|
||||
} else {
|
||||
Err(ShellError::AutomaticEnvVarSetManually {
|
||||
envvar_name: key.clone().into(),
|
||||
envvar_name: key.into(),
|
||||
span: *span,
|
||||
})
|
||||
}
|
||||
|
@ -247,7 +263,8 @@ fn eval_instruction(
|
|||
}
|
||||
Instruction::PushFlag { name } => {
|
||||
ctx.stack.argument_stack.push(Argument::Flag {
|
||||
name: name.clone(),
|
||||
data: ctx.data.clone(),
|
||||
name: *name,
|
||||
span: *span,
|
||||
});
|
||||
Ok(Continue)
|
||||
|
@ -255,7 +272,8 @@ fn eval_instruction(
|
|||
Instruction::PushNamed { name, src } => {
|
||||
let val = ctx.collect_reg(*src, *span)?;
|
||||
ctx.stack.argument_stack.push(Argument::Named {
|
||||
name: name.clone(),
|
||||
data: ctx.data.clone(),
|
||||
name: *name,
|
||||
span: *span,
|
||||
val,
|
||||
});
|
||||
|
@ -277,6 +295,27 @@ fn eval_instruction(
|
|||
ctx.put_reg(*src_dst, result);
|
||||
Ok(Continue)
|
||||
}
|
||||
Instruction::ListPush { src_dst, item } => {
|
||||
let list_value = ctx.collect_reg(*src_dst, *span)?;
|
||||
let item = ctx.collect_reg(*item, *span)?;
|
||||
let list_span = list_value.span();
|
||||
let mut list = list_value.into_list()?;
|
||||
list.push(item);
|
||||
ctx.put_reg(*src_dst, Value::list(list, list_span).into_pipeline_data());
|
||||
Ok(Continue)
|
||||
}
|
||||
Instruction::RecordInsert { src_dst, key, val } => {
|
||||
let record_value = ctx.collect_reg(*src_dst, *span)?;
|
||||
let val = ctx.collect_reg(*val, *span)?;
|
||||
let record_span = record_value.span();
|
||||
let mut record = record_value.into_record()?;
|
||||
record.insert(ctx.get_str(*key, *span)?, val);
|
||||
ctx.put_reg(
|
||||
*src_dst,
|
||||
Value::record(record, record_span).into_pipeline_data(),
|
||||
);
|
||||
Ok(Continue)
|
||||
}
|
||||
Instruction::BinaryOp { lhs_dst, op, rhs } => binary_op(ctx, *lhs_dst, op, *rhs, *span),
|
||||
Instruction::FollowCellPath { src_dst, path } => {
|
||||
let data = ctx.take_reg(*src_dst);
|
||||
|
@ -378,7 +417,7 @@ fn literal_value(
|
|||
Literal::Bool(b) => Value::bool(*b, span),
|
||||
Literal::Int(i) => Value::int(*i, span),
|
||||
Literal::Float(f) => Value::float(*f, span),
|
||||
Literal::Binary(bin) => Value::binary(bin.clone(), span),
|
||||
Literal::Binary(bin) => Value::binary(&ctx.data[*bin], span),
|
||||
Literal::Block(block_id) => Value::closure(
|
||||
Closure {
|
||||
block_id: *block_id,
|
||||
|
@ -401,22 +440,30 @@ fn literal_value(
|
|||
span,
|
||||
)
|
||||
}
|
||||
Literal::List(literals) => {
|
||||
let mut vec = Vec::with_capacity(literals.len());
|
||||
for elem in literals.iter() {
|
||||
vec.push(literal_value(ctx, &elem.item, elem.span)?);
|
||||
}
|
||||
Value::list(vec, span)
|
||||
Literal::Range {
|
||||
start,
|
||||
step,
|
||||
end,
|
||||
inclusion,
|
||||
} => {
|
||||
let start = ctx.collect_reg(*start, span)?;
|
||||
let step = ctx.collect_reg(*step, span)?;
|
||||
let end = ctx.collect_reg(*end, span)?;
|
||||
let range = Range::new(start, step, end, *inclusion, span)?;
|
||||
Value::range(range, span)
|
||||
}
|
||||
Literal::List { capacity } => Value::list(Vec::with_capacity(*capacity), span),
|
||||
Literal::Record { capacity } => Value::record(Record::with_capacity(*capacity), span),
|
||||
Literal::Filepath {
|
||||
val: path,
|
||||
no_expand,
|
||||
} => {
|
||||
let path = ctx.get_str(*path, span)?;
|
||||
if *no_expand {
|
||||
Value::string(path.as_ref(), span)
|
||||
Value::string(path, span)
|
||||
} else {
|
||||
let cwd = ctx.engine_state.cwd(Some(ctx.stack))?;
|
||||
let path = expand_path_with(path.as_ref(), cwd, true);
|
||||
let path = expand_path_with(path, cwd, true);
|
||||
|
||||
Value::string(path.to_string_lossy(), span)
|
||||
}
|
||||
|
@ -425,20 +472,23 @@ fn literal_value(
|
|||
val: path,
|
||||
no_expand,
|
||||
} => {
|
||||
if path.as_ref() == "-" {
|
||||
let path = ctx.get_str(*path, span)?;
|
||||
if path == "-" {
|
||||
Value::string("-", span)
|
||||
} else if *no_expand {
|
||||
Value::string(path.as_ref(), span)
|
||||
Value::string(path, span)
|
||||
} else {
|
||||
let cwd = ctx.engine_state.cwd(Some(ctx.stack)).unwrap_or_default();
|
||||
let path = expand_path_with(path.as_ref(), cwd, true);
|
||||
let path = expand_path_with(path, cwd, true);
|
||||
|
||||
Value::string(path.to_string_lossy(), span)
|
||||
}
|
||||
}
|
||||
Literal::GlobPattern { val, no_expand } => Value::glob(val.as_ref(), *no_expand, span),
|
||||
Literal::String(s) => Value::string(s.clone(), span),
|
||||
Literal::RawString(s) => Value::string(s.clone(), span),
|
||||
Literal::GlobPattern { val, no_expand } => {
|
||||
Value::glob(ctx.get_str(*val, span)?, *no_expand, span)
|
||||
}
|
||||
Literal::String(s) => Value::string(ctx.get_str(*s, span)?, span),
|
||||
Literal::RawString(s) => Value::string(ctx.get_str(*s, span)?, span),
|
||||
Literal::CellPath(path) => Value::cell_path(CellPath::clone(&path), span),
|
||||
Literal::Nothing => Value::nothing(span),
|
||||
})
|
||||
|
|
|
@ -159,17 +159,28 @@ fn ir_call_to_extern_call(
|
|||
// Add the arguments, reformatting named arguments into string positionals
|
||||
for index in 0..call.args_len {
|
||||
match &call.arguments(stack)[index] {
|
||||
engine::Argument::Flag { name, span } => {
|
||||
engine::Argument::Flag { data, name, span } => {
|
||||
let name_arg = engine::Argument::Positional {
|
||||
span: *span,
|
||||
val: Value::string(name.as_ref(), *span),
|
||||
val: Value::string(
|
||||
std::str::from_utf8(&data[*name]).expect("invalid flag name"),
|
||||
*span,
|
||||
),
|
||||
};
|
||||
extern_call.add_argument(stack, name_arg);
|
||||
}
|
||||
engine::Argument::Named { name, span, val } => {
|
||||
engine::Argument::Named {
|
||||
data,
|
||||
name,
|
||||
span,
|
||||
val,
|
||||
} => {
|
||||
let name_arg = engine::Argument::Positional {
|
||||
span: *span,
|
||||
val: Value::string(name.as_ref(), *span),
|
||||
val: Value::string(
|
||||
std::str::from_utf8(&data[*name]).expect("invalid arg name"),
|
||||
*span,
|
||||
),
|
||||
};
|
||||
let val_arg = engine::Argument::Positional {
|
||||
span: *span,
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use crate::{Span, Value};
|
||||
use crate::{ir::DataSlice, Span, Value};
|
||||
|
||||
/// Represents a fully evaluated argument to a call.
|
||||
#[derive(Debug, Clone)]
|
||||
|
@ -16,12 +16,14 @@ pub enum Argument {
|
|||
},
|
||||
/// A named argument with no value, e.g. `--flag`
|
||||
Flag {
|
||||
name: Arc<str>,
|
||||
data: Arc<[u8]>,
|
||||
name: DataSlice,
|
||||
span: Span,
|
||||
},
|
||||
/// A named argument with a value, e.g. `--flag value` or `--flag=`
|
||||
Named {
|
||||
name: Arc<str>,
|
||||
data: Arc<[u8]>,
|
||||
name: DataSlice,
|
||||
span: Span,
|
||||
val: Value,
|
||||
},
|
||||
|
|
|
@ -1,8 +1,12 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use crate::{
|
||||
engine::{self, Argument, Stack},
|
||||
DeclId, ShellError, Span, Spanned, Value,
|
||||
};
|
||||
|
||||
use super::DataSlice;
|
||||
|
||||
/// Contains the information for a call being made to a declared command.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Call {
|
||||
|
@ -59,24 +63,30 @@ impl Call {
|
|||
}
|
||||
|
||||
pub fn named_iter<'a>(
|
||||
&self,
|
||||
&'a self,
|
||||
stack: &'a Stack,
|
||||
) -> impl Iterator<Item = (Spanned<&'a str>, Option<&'a Value>)> + 'a {
|
||||
self.arguments(stack).iter().filter_map(
|
||||
|arg: &Argument| -> Option<(Spanned<&str>, Option<&Value>)> {
|
||||
match arg {
|
||||
Argument::Flag { name, span, .. } => Some((
|
||||
Argument::Flag {
|
||||
data, name, span, ..
|
||||
} => Some((
|
||||
Spanned {
|
||||
item: name,
|
||||
item: std::str::from_utf8(&data[*name]).expect("invalid arg name"),
|
||||
span: *span,
|
||||
},
|
||||
None,
|
||||
)),
|
||||
Argument::Named {
|
||||
name, span, val, ..
|
||||
data,
|
||||
name,
|
||||
span,
|
||||
val,
|
||||
..
|
||||
} => Some((
|
||||
Spanned {
|
||||
item: name,
|
||||
item: std::str::from_utf8(&data[*name]).expect("invalid arg name"),
|
||||
span: *span,
|
||||
},
|
||||
Some(val),
|
||||
|
@ -88,8 +98,20 @@ impl Call {
|
|||
}
|
||||
|
||||
pub fn get_named_arg<'a>(&self, stack: &'a Stack, flag_name: &str) -> Option<&'a Value> {
|
||||
self.named_iter(stack)
|
||||
.find_map(|(name, val)| (name.item == flag_name).then_some(val))
|
||||
// Optimized to avoid str::from_utf8()
|
||||
self.arguments(stack)
|
||||
.iter()
|
||||
.find_map(|arg: &Argument| -> Option<Option<&Value>> {
|
||||
match arg {
|
||||
Argument::Flag { data, name, .. } if &data[*name] == flag_name.as_bytes() => {
|
||||
Some(None)
|
||||
}
|
||||
Argument::Named {
|
||||
data, name, val, ..
|
||||
} if &data[*name] == flag_name.as_bytes() => Some(Some(val)),
|
||||
_ => None,
|
||||
}
|
||||
})
|
||||
.flatten()
|
||||
}
|
||||
|
||||
|
@ -192,13 +214,12 @@ impl CallBuilder {
|
|||
|
||||
/// Add a flag (no-value named) argument to the [`Stack`] and reference it from the [`Call`].
|
||||
pub fn add_flag(&mut self, stack: &mut Stack, name: impl AsRef<str>, span: Span) -> &mut Self {
|
||||
self.add_argument(
|
||||
stack,
|
||||
Argument::Flag {
|
||||
name: name.as_ref().into(),
|
||||
span,
|
||||
},
|
||||
)
|
||||
let data: Arc<[u8]> = name.as_ref().as_bytes().into();
|
||||
let name = DataSlice {
|
||||
start: 0,
|
||||
len: data.len().try_into().expect("flag name too big"),
|
||||
};
|
||||
self.add_argument(stack, Argument::Flag { data, name, span })
|
||||
}
|
||||
|
||||
/// Add a named argument to the [`Stack`] and reference it from the [`Call`].
|
||||
|
@ -209,10 +230,16 @@ impl CallBuilder {
|
|||
span: Span,
|
||||
val: Value,
|
||||
) -> &mut Self {
|
||||
let data: Arc<[u8]> = name.as_ref().as_bytes().into();
|
||||
let name = DataSlice {
|
||||
start: 0,
|
||||
len: data.len().try_into().expect("arg name too big"),
|
||||
};
|
||||
self.add_argument(
|
||||
stack,
|
||||
Argument::Named {
|
||||
name: name.as_ref().into(),
|
||||
data,
|
||||
name,
|
||||
span,
|
||||
val,
|
||||
},
|
||||
|
|
|
@ -2,7 +2,7 @@ use std::fmt;
|
|||
|
||||
use crate::{engine::EngineState, DeclId, VarId};
|
||||
|
||||
use super::{Instruction, IrBlock, RedirectMode};
|
||||
use super::{DataSlice, Instruction, IrBlock, Literal, RedirectMode};
|
||||
|
||||
pub struct FmtIrBlock<'a> {
|
||||
pub(super) engine_state: &'a EngineState,
|
||||
|
@ -14,11 +14,13 @@ impl<'a> fmt::Display for FmtIrBlock<'a> {
|
|||
let plural = |count| if count == 1 { "" } else { "s" };
|
||||
writeln!(
|
||||
f,
|
||||
"# {} register{}, {} instruction{}",
|
||||
"# {} register{}, {} instruction{}, {} byte{} of data",
|
||||
self.ir_block.register_count,
|
||||
plural(self.ir_block.register_count),
|
||||
self.ir_block.instructions.len(),
|
||||
plural(self.ir_block.instructions.len()),
|
||||
self.ir_block.data.len(),
|
||||
plural(self.ir_block.data.len()),
|
||||
)?;
|
||||
for (index, instruction) in self.ir_block.instructions.iter().enumerate() {
|
||||
writeln!(
|
||||
|
@ -27,7 +29,8 @@ impl<'a> fmt::Display for FmtIrBlock<'a> {
|
|||
index,
|
||||
FmtInstruction {
|
||||
engine_state: self.engine_state,
|
||||
instruction
|
||||
instruction,
|
||||
data: &self.ir_block.data,
|
||||
}
|
||||
)?;
|
||||
}
|
||||
|
@ -38,6 +41,7 @@ impl<'a> fmt::Display for FmtIrBlock<'a> {
|
|||
pub struct FmtInstruction<'a> {
|
||||
pub(super) engine_state: &'a EngineState,
|
||||
pub(super) instruction: &'a Instruction,
|
||||
pub(super) data: &'a [u8],
|
||||
}
|
||||
|
||||
impl<'a> fmt::Display for FmtInstruction<'a> {
|
||||
|
@ -46,7 +50,11 @@ impl<'a> fmt::Display for FmtInstruction<'a> {
|
|||
|
||||
match self.instruction {
|
||||
Instruction::LoadLiteral { dst, lit } => {
|
||||
write!(f, "{:WIDTH$} {dst}, {lit:?}", "load-literal")
|
||||
let lit = FmtLiteral {
|
||||
literal: lit,
|
||||
data: self.data,
|
||||
};
|
||||
write!(f, "{:WIDTH$} {dst}, {lit}", "load-literal")
|
||||
}
|
||||
Instruction::Move { dst, src } => {
|
||||
write!(f, "{:WIDTH$} {dst}, {src}", "move")
|
||||
|
@ -69,13 +77,16 @@ impl<'a> fmt::Display for FmtInstruction<'a> {
|
|||
write!(f, "{:WIDTH$} {var}, {src}", "store-variable")
|
||||
}
|
||||
Instruction::LoadEnv { dst, key } => {
|
||||
write!(f, "{:WIDTH$} {dst}, {key:?}", "load-env")
|
||||
let key = FmtData(self.data, *key);
|
||||
write!(f, "{:WIDTH$} {dst}, {key}", "load-env")
|
||||
}
|
||||
Instruction::LoadEnvOpt { dst, key } => {
|
||||
write!(f, "{:WIDTH$} {dst}, {key:?}", "load-env-opt")
|
||||
let key = FmtData(self.data, *key);
|
||||
write!(f, "{:WIDTH$} {dst}, {key}", "load-env-opt")
|
||||
}
|
||||
Instruction::StoreEnv { key, src } => {
|
||||
write!(f, "{:WIDTH$} {key:?}, {src}", "store-env")
|
||||
let key = FmtData(self.data, *key);
|
||||
write!(f, "{:WIDTH$} {key}, {src}", "store-env")
|
||||
}
|
||||
Instruction::PushPositional { src } => {
|
||||
write!(f, "{:WIDTH$} {src}", "push-positional")
|
||||
|
@ -84,10 +95,12 @@ impl<'a> fmt::Display for FmtInstruction<'a> {
|
|||
write!(f, "{:WIDTH$} {src}", "append-rest")
|
||||
}
|
||||
Instruction::PushFlag { name } => {
|
||||
write!(f, "{:WIDTH$} {name:?}", "push-flag")
|
||||
let name = FmtData(self.data, *name);
|
||||
write!(f, "{:WIDTH$} {name}", "push-flag")
|
||||
}
|
||||
Instruction::PushNamed { name, src } => {
|
||||
write!(f, "{:WIDTH$} {name:?}, {src}", "push-named")
|
||||
let name = FmtData(self.data, *name);
|
||||
write!(f, "{:WIDTH$} {name}, {src}", "push-named")
|
||||
}
|
||||
Instruction::RedirectOut { mode } => {
|
||||
write!(f, "{:WIDTH$} {mode}", "redirect-out")
|
||||
|
@ -99,6 +112,13 @@ impl<'a> fmt::Display for FmtInstruction<'a> {
|
|||
let decl = FmtDecl::new(self.engine_state, *decl_id);
|
||||
write!(f, "{:WIDTH$} {decl}, {src_dst}", "call")
|
||||
}
|
||||
Instruction::ListPush { src_dst, item } => {
|
||||
write!(f, "{:WIDTH$} {src_dst}, {item}", "list-push")
|
||||
}
|
||||
Instruction::RecordInsert { src_dst, key, val } => {
|
||||
let key = FmtData(self.data, *key);
|
||||
write!(f, "{:WIDTH$} {src_dst}, {key}, {val}", "record-insert")
|
||||
}
|
||||
Instruction::BinaryOp { lhs_dst, op, rhs } => {
|
||||
write!(f, "{:WIDTH$} {lhs_dst}, {op:?}, {rhs}", "binary-op")
|
||||
}
|
||||
|
@ -170,7 +190,7 @@ impl fmt::Display for FmtVar<'_> {
|
|||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for RedirectMode {
|
||||
impl fmt::Display for RedirectMode {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
RedirectMode::Pipe => write!(f, "pipe"),
|
||||
|
@ -181,3 +201,62 @@ impl std::fmt::Display for RedirectMode {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct FmtData<'a>(&'a [u8], DataSlice);
|
||||
|
||||
impl<'a> fmt::Display for FmtData<'a> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
if let Ok(s) = std::str::from_utf8(&self.0[self.1]) {
|
||||
// Write as string
|
||||
write!(f, "{s:?}")
|
||||
} else {
|
||||
// Write as byte array
|
||||
write!(f, "0x{:x?}", self.0)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct FmtLiteral<'a> {
|
||||
literal: &'a Literal,
|
||||
data: &'a [u8],
|
||||
}
|
||||
|
||||
impl<'a> fmt::Display for FmtLiteral<'a> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self.literal {
|
||||
Literal::Bool(b) => write!(f, "bool({b:?})"),
|
||||
Literal::Int(i) => write!(f, "int({i:?})"),
|
||||
Literal::Float(fl) => write!(f, "float({fl:?})"),
|
||||
Literal::Binary(b) => write!(f, "binary({})", FmtData(self.data, *b)),
|
||||
Literal::Block(id) => write!(f, "block({id})"),
|
||||
Literal::Closure(id) => write!(f, "closure({id})"),
|
||||
Literal::Range {
|
||||
start,
|
||||
step,
|
||||
end,
|
||||
inclusion,
|
||||
} => write!(f, "range({start}, {step}, {end}, {inclusion:?})"),
|
||||
Literal::List { capacity } => write!(f, "list(capacity = {capacity})"),
|
||||
Literal::Record { capacity } => write!(f, "record(capacity = {capacity})"),
|
||||
Literal::Filepath { val, no_expand } => write!(
|
||||
f,
|
||||
"filepath({}, no_expand = {no_expand:?})",
|
||||
FmtData(self.data, *val)
|
||||
),
|
||||
Literal::Directory { val, no_expand } => write!(
|
||||
f,
|
||||
"directory({}, no_expand = {no_expand:?})",
|
||||
FmtData(self.data, *val)
|
||||
),
|
||||
Literal::GlobPattern { val, no_expand } => write!(
|
||||
f,
|
||||
"glob-pattern({}, no_expand = {no_expand:?})",
|
||||
FmtData(self.data, *val)
|
||||
),
|
||||
Literal::String(s) => write!(f, "string({})", FmtData(self.data, *s)),
|
||||
Literal::RawString(rs) => write!(f, "raw-string({})", FmtData(self.data, *rs)),
|
||||
Literal::CellPath(p) => write!(f, "cell-path({p})"),
|
||||
Literal::Nothing => write!(f, "nothing"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use crate::{
|
||||
ast::{CellPath, Operator},
|
||||
ast::{CellPath, Operator, RangeInclusion},
|
||||
engine::EngineState,
|
||||
BlockId, DeclId, RegId, Span, Spanned, VarId,
|
||||
BlockId, DeclId, RegId, Span, VarId,
|
||||
};
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
@ -18,6 +18,8 @@ pub use display::{FmtInstruction, FmtIrBlock};
|
|||
pub struct IrBlock {
|
||||
pub instructions: Vec<Instruction>,
|
||||
pub spans: Vec<Span>,
|
||||
#[serde(with = "serde_arc_u8_array")]
|
||||
pub data: Arc<[u8]>,
|
||||
pub register_count: usize,
|
||||
}
|
||||
|
||||
|
@ -32,6 +34,22 @@ impl IrBlock {
|
|||
}
|
||||
}
|
||||
|
||||
/// A slice into the `data` array of a block. This is a compact and cache-friendly way to store
|
||||
/// string data that a block uses.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
|
||||
pub struct DataSlice {
|
||||
pub start: u32,
|
||||
pub len: u32,
|
||||
}
|
||||
|
||||
impl std::ops::Index<DataSlice> for [u8] {
|
||||
type Output = [u8];
|
||||
|
||||
fn index(&self, index: DataSlice) -> &Self::Output {
|
||||
&self[index.start as usize..(index.start as usize + index.len as usize)]
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum Instruction {
|
||||
/// Load a literal value into the `dst` register
|
||||
|
@ -49,27 +67,20 @@ pub enum Instruction {
|
|||
/// Store the value of a variable from the `src` register
|
||||
StoreVariable { var_id: VarId, src: RegId },
|
||||
/// Load the value of an environment variable into the `dst` register
|
||||
LoadEnv { dst: RegId, key: Box<str> },
|
||||
LoadEnv { dst: RegId, key: DataSlice },
|
||||
/// Load the value of an environment variable into the `dst` register, or `Nothing` if it
|
||||
/// doesn't exist
|
||||
LoadEnvOpt { dst: RegId, key: Box<str> },
|
||||
LoadEnvOpt { dst: RegId, key: DataSlice },
|
||||
/// Store the value of an environment variable from the `src` register
|
||||
StoreEnv { key: Box<str>, src: RegId },
|
||||
StoreEnv { key: DataSlice, src: RegId },
|
||||
/// Add a positional arg to the next call
|
||||
PushPositional { src: RegId },
|
||||
/// Add a list of args to the next call (spread/rest)
|
||||
AppendRest { src: RegId },
|
||||
/// Add a named arg with no value to the next call.
|
||||
PushFlag {
|
||||
#[serde(with = "serde_arc_str")]
|
||||
name: Arc<str>,
|
||||
},
|
||||
PushFlag { name: DataSlice },
|
||||
/// Add a named arg with a value to the next call.
|
||||
PushNamed {
|
||||
#[serde(with = "serde_arc_str")]
|
||||
name: Arc<str>,
|
||||
src: RegId,
|
||||
},
|
||||
PushNamed { name: DataSlice, src: RegId },
|
||||
/// Set the redirection for stdout for the next call (only)
|
||||
RedirectOut { mode: RedirectMode },
|
||||
/// Set the redirection for stderr for the next call (only)
|
||||
|
@ -77,6 +88,14 @@ pub enum Instruction {
|
|||
/// Make a call. The input is taken from `src_dst`, and the output is placed in `src_dst`,
|
||||
/// overwriting it. The argument stack is used implicitly and cleared when the call ends.
|
||||
Call { decl_id: DeclId, src_dst: RegId },
|
||||
/// Push a value onto the end of a list. Used to construct list literals.
|
||||
ListPush { src_dst: RegId, item: RegId },
|
||||
/// Insert a key-value pair into a record. Any existing value for the key is overwritten.
|
||||
RecordInsert {
|
||||
src_dst: RegId,
|
||||
key: DataSlice,
|
||||
val: RegId,
|
||||
},
|
||||
/// Do a binary operation on `lhs_dst` (left) and `rhs` (right) and write the result to
|
||||
/// `lhs_dst`.
|
||||
BinaryOp {
|
||||
|
@ -96,7 +115,6 @@ pub enum Instruction {
|
|||
path: RegId,
|
||||
new_value: RegId,
|
||||
},
|
||||
/// Update a cell path
|
||||
/// Jump to an offset in this block
|
||||
Jump { index: usize },
|
||||
/// Branch to an offset in this block if the value of the `cond` register is a true boolean,
|
||||
|
@ -109,10 +127,15 @@ pub enum Instruction {
|
|||
impl Instruction {
|
||||
/// Returns a value that can be formatted with [`Display`](std::fmt::Display) to show a detailed
|
||||
/// listing of the instruction.
|
||||
pub fn display<'a>(&'a self, engine_state: &'a EngineState) -> FmtInstruction<'a> {
|
||||
pub fn display<'a>(
|
||||
&'a self,
|
||||
engine_state: &'a EngineState,
|
||||
data: &'a [u8],
|
||||
) -> FmtInstruction<'a> {
|
||||
FmtInstruction {
|
||||
engine_state,
|
||||
instruction: self,
|
||||
data,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -128,15 +151,35 @@ pub enum Literal {
|
|||
Bool(bool),
|
||||
Int(i64),
|
||||
Float(f64),
|
||||
Binary(Box<[u8]>),
|
||||
Binary(DataSlice),
|
||||
Block(BlockId),
|
||||
Closure(BlockId),
|
||||
List(Box<[Spanned<Literal>]>),
|
||||
Filepath { val: Box<str>, no_expand: bool },
|
||||
Directory { val: Box<str>, no_expand: bool },
|
||||
GlobPattern { val: Box<str>, no_expand: bool },
|
||||
String(Box<str>),
|
||||
RawString(Box<str>),
|
||||
Range {
|
||||
start: RegId,
|
||||
step: RegId,
|
||||
end: RegId,
|
||||
inclusion: RangeInclusion,
|
||||
},
|
||||
List {
|
||||
capacity: usize,
|
||||
},
|
||||
Record {
|
||||
capacity: usize,
|
||||
},
|
||||
Filepath {
|
||||
val: DataSlice,
|
||||
no_expand: bool,
|
||||
},
|
||||
Directory {
|
||||
val: DataSlice,
|
||||
no_expand: bool,
|
||||
},
|
||||
GlobPattern {
|
||||
val: DataSlice,
|
||||
no_expand: bool,
|
||||
},
|
||||
String(DataSlice),
|
||||
RawString(DataSlice),
|
||||
CellPath(Box<CellPath>),
|
||||
Nothing,
|
||||
}
|
||||
|
@ -163,23 +206,23 @@ pub enum RedirectMode {
|
|||
},
|
||||
}
|
||||
|
||||
/// Just a hack to allow `Arc<str>` to be serialized and deserialized
|
||||
mod serde_arc_str {
|
||||
/// Just a hack to allow `Arc<[u8]>` to be serialized and deserialized
|
||||
mod serde_arc_u8_array {
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::sync::Arc;
|
||||
|
||||
pub fn serialize<S>(string: &Arc<str>, ser: S) -> Result<S::Ok, S::Error>
|
||||
pub fn serialize<S>(data: &Arc<[u8]>, ser: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
{
|
||||
string.as_ref().serialize(ser)
|
||||
data.as_ref().serialize(ser)
|
||||
}
|
||||
|
||||
pub fn deserialize<'de, D>(de: D) -> Result<Arc<str>, D::Error>
|
||||
pub fn deserialize<'de, D>(de: D) -> Result<Arc<[u8]>, D::Error>
|
||||
where
|
||||
D: serde::Deserializer<'de>,
|
||||
{
|
||||
let string: &'de str = Deserialize::deserialize(de)?;
|
||||
Ok(string.into())
|
||||
let data: &'de [u8] = Deserialize::deserialize(de)?;
|
||||
Ok(data.into())
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user