From dacf80f34a2b8301377ce6d05f134a141bf2d9cb Mon Sep 17 00:00:00 2001 From: Doru Date: Wed, 17 May 2023 20:35:22 -0300 Subject: [PATCH] Feature: Userland LazyRecords (#8332) # Description Despite the innocent-looking title, this PR involves quite a few backend changes as the existing LazyRecord trait was not at all friendly towards the idea of these values being generated on the fly from Nu code. In particular, here are a few changes involved: - The LazyRecord trait now involves a lifetime `'a`, and this lifetime is used in the return value of `get_column_names`. This means it no longer returns `'static str`s (but implementations still can return these). This is more stringent on the consumption side. - The LazyRecord trait now must be able to clone itself via a new `clone_value` method (as requiring `Clone` is not object safe). This pattern is borrowed from `Value::CustomValue`. - LazyRecord no longer requires being serde serializable and deserializable. These, in hand, allow for the following: - LazyRecord can now clone itself, which means that they don't have to be collected into a Record when being cloned. - This is especially useful in Stack, which is cloned on each repl line and in a few other cases. This would mean that _every_ LazyRecord instance stored in a variable would be collected in its entirety and cloned, which can be catastrophic for performance. See: `let nulol = $nu`. - LazyRecord's columns don't have to be static, they can have the same lifetime of the struct itself, so different instances of the same LazyRecord type can have different columns and values (like the new `NuLazyRecord`) - Serialization and deserialization are no longer meaningless, they are simply less. I would consider this PR very "drafty", but everything works. It probably requires some cleanup and testing, though, but I'd like some eyes and pointers first. # User-Facing Changes New command. New restrictions are largely internal. Maybe there are some plugins affected? Example of new command's usage: ``` lazy make --columns [a b c] --get-value { |name| print $"getting ($name)"; $name | str upcase } ``` You can also trivially implement something like `lazy make record` to take a record of closures and turn it into a getter-like lazy struct: ``` def "lazy make record" [ record: record ] { let columns = ($record | columns) lazy make --columns $columns --get-value { |col| do ($record | get $col) } } ``` Open to bikeshedding. `lazy make` is similar to `error make` which is also in the core commands. I didn't like `make lazy` since it sounded like some transformation was going on. # Tour for reviewers Take a look at LazyMake's examples. They have `None` as the results, as such they aren't _really_ correct and aren't being tested at all. I didn't do this because creating the Value::LazyRecord is a little tricky and didn't want to risk messing it up, especially as the necessary variables aren't available when creating the examples (like stack and engine state). Also take a look at NuLazyRecord's get_value implementation, or in general. It uses an Arc> for the stack, which must be accessed mutably for eval_block but get_value only provides us with a `&self`. This is a sad state of affairs, but I don't know if there's a better way. On the same code path, we also have pipeline handling, and any pipeline that isn't a Pipeline::Value will return Value::nothing. I believe returning a Value::Error is probably better, or maybe some other handling. Couldn't decide on which ShellError to settle with for that branch. The "unfortunate casualty" in the columns.rs file. I'm not sure just how bad that is, though, I simply had to fight a little with the borrow checker. A few leftover comments like derives, comments about the now non-existing serde requirements, and impls. I'll definitely get around to those eventually but they're in atm Should NuLazyRecord implement caching? I'm leaning heavily towards **yes**, this was one of the main reasons not to use a record of closures (besides convenience), but maybe it could be opt-out. I'd wonder about its implementation too, but a simple way would be to move a HashMap into the mutex state and keep cached values there. --- .../src/core_commands/lazy_make.rs | 158 ++++++++++++++++++ crates/nu-cmd-lang/src/core_commands/mod.rs | 2 + crates/nu-cmd-lang/src/default_context.rs | 1 + crates/nu-command/src/filters/columns.rs | 18 +- crates/nu-command/src/system/sys.rs | 16 +- crates/nu-engine/src/nu_variable.rs | 18 +- crates/nu-protocol/src/value/lazy_record.rs | 9 +- crates/nu-protocol/src/value/mod.rs | 14 +- 8 files changed, 192 insertions(+), 44 deletions(-) create mode 100644 crates/nu-cmd-lang/src/core_commands/lazy_make.rs diff --git a/crates/nu-cmd-lang/src/core_commands/lazy_make.rs b/crates/nu-cmd-lang/src/core_commands/lazy_make.rs new file mode 100644 index 0000000000..3762292d3f --- /dev/null +++ b/crates/nu-cmd-lang/src/core_commands/lazy_make.rs @@ -0,0 +1,158 @@ +use std::sync::{Arc, Mutex}; + +use nu_engine::{eval_block, CallExt}; +use nu_protocol::ast::Call; +use nu_protocol::engine::{Closure, Command, EngineState, Stack}; +use nu_protocol::{ + Category, Example, IntoPipelineData, LazyRecord, PipelineData, ShellError, Signature, Span, + SyntaxShape, Type, Value, +}; + +#[derive(Clone)] +pub struct LazyMake; + +impl Command for LazyMake { + fn name(&self) -> &str { + "lazy make" + } + + fn signature(&self) -> Signature { + Signature::build("lazy make") + .input_output_types(vec![(Type::Nothing, Type::Record(vec![]))]) + .required_named( + "columns", + SyntaxShape::List(Box::new(SyntaxShape::String)), + "Closure that gets called when the LazyRecord needs to list the available column names", + Some('c') + ) + .required_named( + "get-value", + SyntaxShape::Closure(Some(vec![SyntaxShape::String])), + "Closure to call when a value needs to be produced on demand", + Some('g') + ) + .category(Category::Core) + } + + fn usage(&self) -> &str { + "Create a lazy record." + } + + fn extra_usage(&self) -> &str { + "Lazy records are special records that only evaluate their values once the property is requested. + For example, when printing a lazy record, all of its fields will be collected. But when accessing + a specific property, only it will be evaluated. + + Note that this is unrelated to the lazyframes feature bundled with dataframes." + } + + fn search_terms(&self) -> Vec<&str> { + vec!["deferred", "record", "procedural"] + } + + fn run( + &self, + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, + _input: PipelineData, + ) -> Result { + let span = call.head; + let columns: Vec = call + .get_flag(engine_state, stack, "columns")? + .expect("required flag"); + let get_value: Closure = call + .get_flag(engine_state, stack, "get-value")? + .expect("required flag"); + + Ok(Value::LazyRecord { + val: Box::new(NuLazyRecord { + engine_state: engine_state.clone(), + stack: Arc::new(Mutex::new(stack.clone())), + columns, + get_value, + span, + }), + span, + } + .into_pipeline_data()) + } + + fn examples(&self) -> Vec { + vec![ + // TODO: Figure out how to "test" these examples, or leave results as None + Example { + description: "Create a lazy record", + example: r#"lazy make --columns ["haskell", "futures", "nushell"] --get-value { |lazything| $lazything + "!" }"#, + result: None, + }, + Example { + description: "Test the laziness of lazy records", + example: r#"lazy make -c ["hello"] -g { |key| print $"getting ($key)!"; $key | str upcase }"#, + result: None, + }, + ] + } +} + +#[derive(Clone)] +struct NuLazyRecord { + engine_state: EngineState, + stack: Arc>, + columns: Vec, + get_value: Closure, + span: Span, +} + +impl std::fmt::Debug for NuLazyRecord { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("NuLazyRecord").finish() + } +} + +impl<'a> LazyRecord<'a> for NuLazyRecord { + fn column_names(&'a self) -> Vec<&'a str> { + self.columns.iter().map(|column| column.as_str()).collect() + } + + fn get_column_value(&self, column: &str) -> Result { + let block = self.engine_state.get_block(self.get_value.block_id); + let mut stack = self.stack.lock().expect("lock must not be poisoned"); + let column_value = Value::String { + val: column.into(), + span: self.span, + }; + + if let Some(var) = block.signature.get_positional(0) { + if let Some(var_id) = &var.var_id { + stack.add_var(*var_id, column_value.clone()); + } + } + + let pipeline_result = eval_block( + &self.engine_state, + &mut stack, + block, + PipelineData::Value(column_value, None), + false, + false, + ); + + pipeline_result.map(|data| match data { + PipelineData::Value(value, ..) => value, + // TODO: Proper error handling. + _ => Value::Nothing { span: self.span }, + }) + } + + fn span(&self) -> Span { + self.span + } + + fn clone_value(&self, span: Span) -> Value { + Value::LazyRecord { + val: Box::new((*self).clone()), + span, + } + } +} diff --git a/crates/nu-cmd-lang/src/core_commands/mod.rs b/crates/nu-cmd-lang/src/core_commands/mod.rs index efde1a4651..32ea378968 100644 --- a/crates/nu-cmd-lang/src/core_commands/mod.rs +++ b/crates/nu-cmd-lang/src/core_commands/mod.rs @@ -28,6 +28,7 @@ mod hide; mod hide_env; mod if_; mod ignore; +mod lazy_make; mod let_; mod loop_; mod match_; @@ -70,6 +71,7 @@ pub use hide::Hide; pub use hide_env::HideEnv; pub use if_::If; pub use ignore::Ignore; +pub use lazy_make::LazyMake; pub use let_::Let; pub use loop_::Loop; pub use match_::Match; diff --git a/crates/nu-cmd-lang/src/default_context.rs b/crates/nu-cmd-lang/src/default_context.rs index 31ebbc0b6f..c054cba1d6 100644 --- a/crates/nu-cmd-lang/src/default_context.rs +++ b/crates/nu-cmd-lang/src/default_context.rs @@ -51,6 +51,7 @@ pub fn create_default_context() -> EngineState { OverlayList, OverlayNew, OverlayHide, + LazyMake, Let, Loop, Match, diff --git a/crates/nu-command/src/filters/columns.rs b/crates/nu-command/src/filters/columns.rs index f8fca080b8..f346ffd33c 100644 --- a/crates/nu-command/src/filters/columns.rs +++ b/crates/nu-command/src/filters/columns.rs @@ -122,15 +122,15 @@ fn getcol( .into_pipeline_data(ctrlc) .set_metadata(metadata)) } - PipelineData::Value(Value::LazyRecord { val, .. }, ..) => Ok(val - .column_names() - .into_iter() - .map(move |x| Value::String { - val: x.into(), - span: head, - }) - .into_pipeline_data(ctrlc) - .set_metadata(metadata)), + PipelineData::Value(Value::LazyRecord { val, .. }, ..) => Ok({ + // Unfortunate casualty to LazyRecord's column_names not generating 'static strs + let cols: Vec<_> = val.column_names().iter().map(|s| s.to_string()).collect(); + + cols.into_iter() + .map(move |x| Value::String { val: x, span: head }) + .into_pipeline_data(ctrlc) + .set_metadata(metadata) + }), PipelineData::Value(Value::Record { cols, .. }, ..) => Ok(cols .into_iter() .map(move |x| Value::String { val: x, span: head }) diff --git a/crates/nu-command/src/system/sys.rs b/crates/nu-command/src/system/sys.rs index 1966bfd0c5..d7c6bc3232 100644 --- a/crates/nu-command/src/system/sys.rs +++ b/crates/nu-command/src/system/sys.rs @@ -6,7 +6,6 @@ use nu_protocol::{ Category, Example, IntoPipelineData, LazyRecord, PipelineData, ShellError, Signature, Span, Type, Value, }; -use serde::{Deserialize, Serialize}; use std::time::{Duration, UNIX_EPOCH}; use sysinfo::{ ComponentExt, CpuExt, CpuRefreshKind, DiskExt, NetworkExt, System, SystemExt, UserExt, @@ -68,12 +67,12 @@ impl Command for Sys { } } -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, Clone)] pub struct SysResult { pub span: Span, } -impl LazyRecord for SysResult { +impl LazyRecord<'_> for SysResult { fn column_names(&self) -> Vec<&'static str> { vec!["host", "cpu", "disks", "mem", "temp", "net"] } @@ -100,12 +99,11 @@ impl LazyRecord for SysResult { self.span } - fn typetag_name(&self) -> &'static str { - "sys" - } - - fn typetag_deserialize(&self) { - unimplemented!("typetag_deserialize") + fn clone_value(&self, span: Span) -> Value { + Value::LazyRecord { + val: Box::new((*self).clone()), + span, + } } } diff --git a/crates/nu-engine/src/nu_variable.rs b/crates/nu-engine/src/nu_variable.rs index 0441bc4d51..72c1f92a44 100644 --- a/crates/nu-engine/src/nu_variable.rs +++ b/crates/nu-engine/src/nu_variable.rs @@ -4,7 +4,6 @@ use nu_protocol::{ engine::{EngineState, Stack}, LazyRecord, ShellError, Span, Value, }; -use serde::{Deserialize, Serialize}; use std::path::PathBuf; use sysinfo::SystemExt; @@ -14,16 +13,14 @@ use sysinfo::SystemExt; // Note: NuVariable is not meaningfully serializable, this #[derive] is a lie to satisfy the type checker. // Make sure to collect() the record before serializing it -#[derive(Serialize, Deserialize)] +#[derive(Clone)] pub struct NuVariable { - #[serde(skip)] pub engine_state: EngineState, - #[serde(skip)] pub stack: Stack, pub span: Span, } -impl LazyRecord for NuVariable { +impl<'a> LazyRecord<'a> for NuVariable { fn column_names(&self) -> Vec<&'static str> { let mut cols = vec![ "default-config-dir", @@ -253,12 +250,11 @@ impl LazyRecord for NuVariable { self.span } - fn typetag_name(&self) -> &'static str { - "nu_variable" - } - - fn typetag_deserialize(&self) { - unimplemented!("typetag_deserialize") + fn clone_value(&self, span: Span) -> Value { + Value::LazyRecord { + val: Box::new((*self).clone()), + span, + } } } diff --git a/crates/nu-protocol/src/value/lazy_record.rs b/crates/nu-protocol/src/value/lazy_record.rs index f2753dede4..a956b82e79 100644 --- a/crates/nu-protocol/src/value/lazy_record.rs +++ b/crates/nu-protocol/src/value/lazy_record.rs @@ -4,10 +4,9 @@ use std::fmt; // Trait definition for a lazy record (where columns are evaluated on-demand) // typetag is needed to make this implement Serialize+Deserialize... even though we should never actually serialize a LazyRecord. // To serialize a LazyRecord, collect it into a Value::Record with collect() first. -#[typetag::serde(tag = "type")] -pub trait LazyRecord: fmt::Debug + Send + Sync { +pub trait LazyRecord<'a>: fmt::Debug + Send + Sync { // All column names - fn column_names(&self) -> Vec<&'static str>; + fn column_names(&'a self) -> Vec<&'a str>; // Get 1 specific column value fn get_column_value(&self, column: &str) -> Result; @@ -15,7 +14,7 @@ pub trait LazyRecord: fmt::Debug + Send + Sync { fn span(&self) -> Span; // Convert the lazy record into a regular Value::Record by collecting all its columns - fn collect(&self) -> Result { + fn collect(&'a self) -> Result { let mut cols = vec![]; let mut vals = vec![]; @@ -31,4 +30,6 @@ pub trait LazyRecord: fmt::Debug + Send + Sync { span: self.span(), }) } + + fn clone_value(&self, span: Span) -> Value; } diff --git a/crates/nu-protocol/src/value/mod.rs b/crates/nu-protocol/src/value/mod.rs index 096577c29c..24de846e3d 100644 --- a/crates/nu-protocol/src/value/mod.rs +++ b/crates/nu-protocol/src/value/mod.rs @@ -108,9 +108,9 @@ pub enum Value { val: Box, span: Span, }, - #[serde(skip_serializing)] + #[serde(skip)] LazyRecord { - val: Box, + val: Box LazyRecord<'a>>, span: Span, }, MatchPattern { @@ -150,15 +150,7 @@ impl Clone for Value { vals: vals.clone(), span: *span, }, - Value::LazyRecord { val, .. } => { - match val.collect() { - Ok(val) => val, - // this is a bit weird, but because clone() is infallible... - Err(error) => Value::Error { - error: Box::new(error), - }, - } - } + Value::LazyRecord { val, span } => val.clone_value(*span), Value::List { vals, span } => Value::List { vals: vals.clone(), span: *span,