nushell/crates/nu-protocol/src/ir/mod.rs

354 lines
12 KiB
Rust

use std::{fmt, sync::Arc};
use crate::{
ast::{CellPath, Expression, Operator, Pattern, RangeInclusion},
engine::EngineState,
BlockId, DeclId, RegId, Span, Value, VarId,
};
use chrono::{DateTime, FixedOffset};
use serde::{Deserialize, Serialize};
mod call;
mod display;
pub use call::*;
pub use display::{FmtInstruction, FmtIrBlock};
#[derive(Clone, Serialize, Deserialize)]
pub struct IrBlock {
pub instructions: Vec<Instruction>,
pub spans: Vec<Span>,
#[serde(with = "serde_arc_u8_array")]
pub data: Arc<[u8]>,
pub ast: Vec<Option<IrAstRef>>,
pub register_count: u32,
pub file_count: u32,
}
impl fmt::Debug for IrBlock {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
// the ast field is too verbose and doesn't add much
f.debug_struct("IrBlock")
.field("instructions", &self.instructions)
.field("spans", &self.spans)
.field("data", &self.data)
.field("register_count", &self.register_count)
.field("file_count", &self.register_count)
.finish_non_exhaustive()
}
}
impl IrBlock {
/// Returns a value that can be formatted with [`Display`](std::fmt::Display) to show a detailed
/// listing of the instructions contained within this [`IrBlock`].
pub fn display<'a>(&'a self, engine_state: &'a EngineState) -> FmtIrBlock<'a> {
FmtIrBlock {
engine_state,
ir_block: self,
}
}
}
/// A slice into the `data` array of a block. This is a compact and cache-friendly way to store
/// string data that a block uses.
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
pub struct DataSlice {
pub start: u32,
pub len: u32,
}
impl DataSlice {
/// A data slice that contains no data. This slice is always valid.
pub const fn empty() -> DataSlice {
DataSlice { start: 0, len: 0 }
}
}
impl std::ops::Index<DataSlice> for [u8] {
type Output = [u8];
fn index(&self, index: DataSlice) -> &Self::Output {
&self[index.start as usize..(index.start as usize + index.len as usize)]
}
}
/// A possible reference into the abstract syntax tree for an instruction. This is not present for
/// most instructions and is just added when needed.
#[derive(Debug, Clone)]
pub struct IrAstRef(pub Arc<Expression>);
impl Serialize for IrAstRef {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
self.0.as_ref().serialize(serializer)
}
}
impl<'de> Deserialize<'de> for IrAstRef {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
Expression::deserialize(deserializer).map(|expr| IrAstRef(Arc::new(expr)))
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum Instruction {
/// Unreachable code path (error)
Unreachable,
/// Load a literal value into the `dst` register
LoadLiteral { dst: RegId, lit: Literal },
/// Load a clone of a boxed value into the `dst` register (e.g. from const evaluation)
LoadValue { dst: RegId, val: Box<Value> },
/// Move a register. Value is taken from `src` (used by this instruction).
Move { dst: RegId, src: RegId },
/// Copy a register (must be a collected value). Value is still in `src` after this instruction.
Clone { dst: RegId, src: RegId },
/// Collect a stream in a register to a value
Collect { src_dst: RegId },
/// Drop the value/straem in a register, without draining
Drop { src: RegId },
/// Drain the value/stream in a register and discard (e.g. semicolon).
///
/// If passed a stream from an external command, sets $env.LAST_EXIT_CODE to the resulting exit
/// code, and invokes any available error handler with Empty, or if not available, returns an
/// exit-code-only stream, leaving the block.
Drain { src: RegId },
/// Load the value of a variable into the `dst` register
LoadVariable { dst: RegId, var_id: VarId },
/// Store the value of a variable from the `src` register
StoreVariable { var_id: VarId, src: RegId },
/// Load the value of an environment variable into the `dst` register
LoadEnv { dst: RegId, key: DataSlice },
/// Load the value of an environment variable into the `dst` register, or `Nothing` if it
/// doesn't exist
LoadEnvOpt { dst: RegId, key: DataSlice },
/// Store the value of an environment variable from the `src` register
StoreEnv { key: DataSlice, src: RegId },
/// Add a positional arg to the next (internal) call.
PushPositional { src: RegId },
/// Add a list of args to the next (internal) call (spread/rest).
AppendRest { src: RegId },
/// Add a named arg with no value to the next (internal) call.
PushFlag { name: DataSlice },
/// Add a named arg with a value to the next (internal) call.
PushNamed { name: DataSlice, src: RegId },
/// Add parser info to the next (internal) call.
PushParserInfo {
name: DataSlice,
info: Box<Expression>,
},
/// Set the redirection for stdout for the next call (only).
///
/// The register for a file redirection is not consumed.
RedirectOut { mode: RedirectMode },
/// Set the redirection for stderr for the next call (only).
///
/// The register for a file redirection is not consumed.
RedirectErr { mode: RedirectMode },
/// Throw an error if stderr wasn't redirected in the given stream. `src` is preserved.
CheckErrRedirected { src: RegId },
/// Open a file for redirection, pushing it onto the file stack.
OpenFile {
file_num: u32,
path: RegId,
append: bool,
},
/// Write data from the register to a file. This is done to finish a file redirection, in case
/// an internal command or expression was evaluated rather than an external one.
WriteFile { file_num: u32, src: RegId },
/// Pop a file used for redirection from the file stack.
CloseFile { file_num: u32 },
/// Make a call. The input is taken from `src_dst`, and the output is placed in `src_dst`,
/// overwriting it. The argument stack is used implicitly and cleared when the call ends.
Call { decl_id: DeclId, src_dst: RegId },
/// Append a value onto the end of a string. Uses `to_expanded_string(", ", ...)` on the value.
/// Used for string interpolation literals. Not the same thing as the `++` operator.
StringAppend { src_dst: RegId, val: RegId },
/// Convert a string into a glob. Used for glob interpolation.
GlobFrom { src_dst: RegId, no_expand: bool },
/// Push a value onto the end of a list. Used to construct list literals.
ListPush { src_dst: RegId, item: RegId },
/// Spread a value onto the end of a list. Used to construct list literals.
ListSpread { src_dst: RegId, items: RegId },
/// Insert a key-value pair into a record. Used to construct record literals. Raises an error if
/// the key already existed in the record.
RecordInsert {
src_dst: RegId,
key: RegId,
val: RegId,
},
/// Spread a record onto a record. Used to construct record literals. Any existing value for the
/// key is overwritten.
RecordSpread { src_dst: RegId, items: RegId },
/// Negate a boolean.
Not { src_dst: RegId },
/// Do a binary operation on `lhs_dst` (left) and `rhs` (right) and write the result to
/// `lhs_dst`.
BinaryOp {
lhs_dst: RegId,
op: Operator,
rhs: RegId,
},
/// Follow a cell path on the value in `src_dst`, storing the result back to `src_dst`
FollowCellPath { src_dst: RegId, path: RegId },
/// Clone the value at a cell path in `src`, storing the result to `dst`. The original value
/// remains in `src`. Must be a collected value.
CloneCellPath { dst: RegId, src: RegId, path: RegId },
/// Update/insert a cell path to `new_value` on the value in `src_dst`, storing the modified
/// value back to `src_dst`
UpsertCellPath {
src_dst: RegId,
path: RegId,
new_value: RegId,
},
/// Jump to an offset in this block
Jump { index: usize },
/// Branch to an offset in this block if the value of the `cond` register is a true boolean,
/// otherwise continue execution
BranchIf { cond: RegId, index: usize },
/// Branch to an offset in this block if the value of the `src` register is Empty or Nothing,
/// otherwise continue execution. The original value in `src` is preserved.
BranchIfEmpty { src: RegId, index: usize },
/// Match a pattern on `src`. If the pattern matches, branch to `index` after having set any
/// variables captured by the pattern. If the pattern doesn't match, continue execution. The
/// original value is preserved in `src` through this instruction.
Match {
pattern: Box<Pattern>,
src: RegId,
index: usize,
},
/// Iterate on register `stream`, putting the next value in `dst` if present, or jumping to
/// `end_index` if the iterator is finished
Iterate {
dst: RegId,
stream: RegId,
end_index: usize,
},
/// Push an error handler, without capturing the error value
OnError { index: usize },
/// Push an error handler, capturing the error value into `dst`. If the error handler is not
/// called, the register should be freed manually.
OnErrorInto { index: usize, dst: RegId },
/// Pop an error handler. This is not necessary when control flow is directed to the error
/// handler due to an error.
PopErrorHandler,
/// Check if an external command failed. Boolean value into `dst`. `src` is preserved, but it
/// does require waiting for the command to exit.
CheckExternalFailed { dst: RegId, src: RegId },
/// Return from the block with the value in the register
Return { src: RegId },
}
impl Instruction {
/// Returns a value that can be formatted with [`Display`](std::fmt::Display) to show a detailed
/// listing of the instruction.
pub fn display<'a>(
&'a self,
engine_state: &'a EngineState,
data: &'a [u8],
) -> FmtInstruction<'a> {
FmtInstruction {
engine_state,
instruction: self,
data,
}
}
}
// This is to document/enforce the size of `Instruction` in bytes.
// We should try to avoid increasing the size of `Instruction`,
// and PRs that do so will have to change the number below so that it's noted in review.
const _: () = assert!(std::mem::size_of::<Instruction>() <= 24);
/// A literal value that can be embedded in an instruction.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum Literal {
Bool(bool),
Int(i64),
Float(f64),
Filesize(i64),
Duration(i64),
Binary(DataSlice),
Block(BlockId),
Closure(BlockId),
RowCondition(BlockId),
Range {
start: RegId,
step: RegId,
end: RegId,
inclusion: RangeInclusion,
},
List {
capacity: usize,
},
Record {
capacity: usize,
},
Filepath {
val: DataSlice,
no_expand: bool,
},
Directory {
val: DataSlice,
no_expand: bool,
},
GlobPattern {
val: DataSlice,
no_expand: bool,
},
String(DataSlice),
RawString(DataSlice),
CellPath(Box<CellPath>),
Date(Box<DateTime<FixedOffset>>),
Nothing,
}
/// A redirection mode for the next call. See [`OutDest`](crate::OutDest).
///
/// This is generated by:
///
/// 1. Explicit redirection in a [`PipelineElement`](crate::ast::PipelineElement), or
/// 2. The [`pipe_redirection()`](crate::engine::Command::pipe_redirection) of the command being
/// piped into.
///
/// Not setting it uses the default, determined by [`Stack`](crate::engine::Stack).
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
pub enum RedirectMode {
Pipe,
Capture,
Null,
Inherit,
/// Use the given numbered file.
File {
file_num: u32,
},
/// Use the redirection mode requested by the caller, for a pre-return call.
Caller,
}
/// Just a hack to allow `Arc<[u8]>` to be serialized and deserialized
mod serde_arc_u8_array {
use serde::{Deserialize, Serialize};
use std::sync::Arc;
pub fn serialize<S>(data: &Arc<[u8]>, ser: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
data.as_ref().serialize(ser)
}
pub fn deserialize<'de, D>(de: D) -> Result<Arc<[u8]>, D::Error>
where
D: serde::Deserializer<'de>,
{
let data: Vec<u8> = Deserialize::deserialize(de)?;
Ok(data.into())
}
}