From 7f6d32dd8404cf182de3a8f6fe9878bdb804b627 Mon Sep 17 00:00:00 2001 From: Devyn Cairns Date: Sat, 8 Jun 2024 02:13:59 -0700 Subject: [PATCH] wip: internal representation for evaluation --- crates/nu-protocol/src/id.rs | 5 + crates/nu-protocol/src/ir/mod.rs | 195 +++++++++++++++++++++++++++++++ crates/nu-protocol/src/lib.rs | 1 + 3 files changed, 201 insertions(+) create mode 100644 crates/nu-protocol/src/ir/mod.rs diff --git a/crates/nu-protocol/src/id.rs b/crates/nu-protocol/src/id.rs index 73c4f52e70..a6d41020c5 100644 --- a/crates/nu-protocol/src/id.rs +++ b/crates/nu-protocol/src/id.rs @@ -7,5 +7,10 @@ pub type ModuleId = usize; pub type OverlayId = usize; pub type FileId = usize; pub type VirtualPathId = usize; + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)] +#[repr(transparent)] +pub struct RegId(pub u32); + #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)] pub struct SpanId(pub usize); // more robust ID style used in the new parser diff --git a/crates/nu-protocol/src/ir/mod.rs b/crates/nu-protocol/src/ir/mod.rs new file mode 100644 index 0000000000..76ecc2d9c8 --- /dev/null +++ b/crates/nu-protocol/src/ir/mod.rs @@ -0,0 +1,195 @@ +use std::fmt; + +use crate::{ + ast::{CellPath, Operator}, + BlockId, DeclId, Range, RegId, Span, +}; + +#[derive(Debug, Clone)] +pub struct IrBlock { + pub instructions: Vec, + pub spans: Vec, + pub register_count: usize, +} + +impl fmt::Display for IrBlock { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + writeln!( + f, + "# {} registers, {} instructions", + self.register_count, + self.instructions.len() + )?; + for instruction in &self.instructions { + writeln!(f, "{}", instruction)?; + } + Ok(()) + } +} + +#[derive(Debug, Clone)] +pub enum Instruction { + /// Load a literal value into the `dst` register + LoadLiteral { dst: RegId, lit: Literal }, + /// Append a list in `src` to the list in `dst`, modifying `dst`. Used for list spread + AppendList { dst: RegId, src: RegId }, + /// Append a string in `src` to the string in `dst`, modifying `dst`. Used for string + /// interpolation + AppendString { dst: RegId, src: RegId }, + /// Copy a register (must be a collected value) + Clone { dst: RegId, src: RegId }, + /// Collect a stream in a register to a value + Collect { src_dst: RegId }, + /// Add a positional arg to the next call + PushPositional { src: RegId }, + /// Add a list of args to the next call (spread/rest) + AppendRest { src: RegId }, + /// Add a named arg to the next call. The `src` is optional, can be `None` if there is no value. + PushNamed { name: Box, src: Option }, + /// Clear the argument stack for the next call + ClearArgs, + /// Make a call. The input is taken from `src_dst`, and the output is placed in `src_dst`, + /// overwriting it. The argument stack is used implicitly and cleared when the call ends. + Call { decl_id: DeclId, src_dst: RegId }, + /// Do a binary operation on `lhs` (left) and `rhs` (right) and write the result to `dst`. + BinaryOp { + dst: RegId, + op: Operator, + lhs: RegId, + rhs: RegId, + }, + /// Jump to an offset in this block + Jump { index: usize }, + /// Branch to an offset in this block if the value of the `cond` register is a true boolean, + /// otherwise continue execution + BranchIf { cond: RegId, index: usize }, + /// Return from the block with the value in the register + Return { src: RegId }, +} + +impl fmt::Display for Instruction { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Instruction::LoadLiteral { dst, lit } => { + write!(f, "{:15} %{}, {:?}", "load-literal", dst.0, lit) + } + Instruction::AppendList { dst, src } => { + write!(f, "{:15} %{}, %{}", "append-list", dst.0, src.0) + } + Instruction::AppendString { dst, src } => { + write!(f, "{:15} %{}, %{}", "append-string", dst.0, src.0) + } + Instruction::Clone { dst, src } => { + write!(f, "{:15} %{}, %{}", "clone", dst.0, src.0) + } + Instruction::Collect { src_dst } => { + write!(f, "{:15} %{}", "clone", src_dst.0) + } + Instruction::PushPositional { src } => { + write!(f, "{:15} %{}", "push-positional", src.0) + } + Instruction::AppendRest { src } => { + write!(f, "{:15} %{}", "append-rest", src.0) + } + Instruction::PushNamed { name, src } => { + if let Some(src) = src { + write!(f, "{:15} {:?}, %{}", "push-named", name, src.0) + } else { + write!(f, "{:15} {:?}", "push-named", name) + } + } + Instruction::ClearArgs => { + write!(f, "{:15}", "clear-args") + } + Instruction::Call { decl_id, src_dst } => { + write!(f, "{:15} decl {}, %{}", "call", decl_id, src_dst.0) + } + Instruction::BinaryOp { dst, lhs, op, rhs } => { + write!( + f, + "{:15} %{}, {:?}, %{}, %{}", + "binary-op", dst.0, op, lhs.0, rhs.0 + ) + } + Instruction::Jump { index } => { + write!(f, "{:15} {}", "jump", index) + } + Instruction::BranchIf { cond, index } => { + write!(f, "{:15} %{}, {}", "branch-if", cond.0, index) + } + Instruction::Return { src } => { + write!(f, "{:15} %{}", "return", src.0) + } + } + } +} + +// This is to document/enforce the size of `Instruction` in bytes. +// We should try to avoid increasing the size of `Instruction`, +// and PRs that do so will have to change the number below so that it's noted in review. +const _: () = assert!(std::mem::size_of::() <= 40); + +#[derive(Debug, Clone)] +pub enum Literal { + Bool(bool), + Int(i64), + Float(f64), + Binary(Vec), + Range(Box), + Block(BlockId), + Closure(BlockId), + List(Vec), + Filepath(String, bool), + Directory(String, bool), + GlobPattern(String, bool), + String(String), + RawString(String), + CellPath(CellPath), + Nothing, +} + +#[test] +fn dummy_test() { + use crate::ast::Math; + + let ir_block = IrBlock { + instructions: vec![ + Instruction::LoadLiteral { + dst: RegId(0), + lit: Literal::String("foo".into()), + }, + Instruction::PushPositional { src: RegId(0) }, + Instruction::LoadLiteral { + dst: RegId(2), + lit: Literal::Int(40), + }, + Instruction::LoadLiteral { + dst: RegId(3), + lit: Literal::Int(25), + }, + Instruction::BinaryOp { + dst: RegId(1), + op: Operator::Math(Math::Plus), + lhs: RegId(2), + rhs: RegId(3), + }, + Instruction::PushNamed { + name: "bar-level".into(), + src: Some(RegId(1)), + }, + Instruction::LoadLiteral { + dst: RegId(4), + lit: Literal::Nothing, + }, + Instruction::Call { + decl_id: 40, + src_dst: RegId(4), + }, + Instruction::Return { src: RegId(4) }, + ], + spans: vec![], + register_count: 5, + }; + println!("{}", ir_block); + todo!(); +} diff --git a/crates/nu-protocol/src/lib.rs b/crates/nu-protocol/src/lib.rs index d09186cf46..6721642027 100644 --- a/crates/nu-protocol/src/lib.rs +++ b/crates/nu-protocol/src/lib.rs @@ -9,6 +9,7 @@ pub mod eval_base; pub mod eval_const; mod example; mod id; +pub mod ir; mod lev_distance; mod module; mod pipeline;