diff --git a/crates/nu-command/src/dataframe/eager/filter_with.rs b/crates/nu-command/src/dataframe/eager/filter_with.rs index a93bca818d..910d099470 100644 --- a/crates/nu-command/src/dataframe/eager/filter_with.rs +++ b/crates/nu-command/src/dataframe/eager/filter_with.rs @@ -29,23 +29,37 @@ impl Command for FilterWith { SyntaxShape::Any, "boolean mask used to filter data", ) - .category(Category::Custom("dataframe".into())) + .category(Category::Custom("dataframe or lazyframe".into())) } fn examples(&self) -> Vec { - vec![Example { - description: "Filter dataframe using a bool mask", - example: r#"let mask = ([true false] | dfr to-df); + vec![ + Example { + description: "Filter dataframe using a bool mask", + example: r#"let mask = ([true false] | dfr to-df); [[a b]; [1 2] [3 4]] | dfr to-df | dfr filter-with $mask"#, - result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new("a".to_string(), vec![Value::test_int(1)]), - Column::new("b".to_string(), vec![Value::test_int(2)]), - ]) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new("a".to_string(), vec![Value::test_int(1)]), + Column::new("b".to_string(), vec![Value::test_int(2)]), + ]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Filter dataframe using an expression", + example: "[[a b]; [1 2] [3 4]] | dfr to-df | dfr filter-with ((dfr col a) > 1)", + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new("a".to_string(), vec![Value::test_int(3)]), + Column::new("b".to_string(), vec![Value::test_int(4)]), + ]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + ] } fn run( @@ -81,31 +95,42 @@ fn command_eager( df: NuDataFrame, ) -> Result { let mask_value: Value = call.req(engine_state, stack, 0)?; - let mask_span = mask_value.span()?; - let mask = NuDataFrame::try_from_value(mask_value)?.as_series(mask_span)?; - let mask = mask.bool().map_err(|e| { - ShellError::GenericError( - "Error casting to bool".into(), - e.to_string(), - Some(mask_span), - Some("Perhaps you want to use a series with booleans as mask".into()), - Vec::new(), - ) - })?; - df.as_ref() - .filter(mask) - .map_err(|e| { + if NuExpression::can_downcast(&mask_value) { + let expression = NuExpression::try_from_value(mask_value)?; + let lazy = NuLazyFrame::new(true, df.lazy()); + let lazy = lazy.apply_with_expr(expression, LazyFrame::filter); + + Ok(PipelineData::Value( + NuLazyFrame::into_value(lazy, call.head)?, + None, + )) + } else { + let mask = NuDataFrame::try_from_value(mask_value)?.as_series(mask_span)?; + let mask = mask.bool().map_err(|e| { ShellError::GenericError( - "Error calculating dummies".into(), + "Error casting to bool".into(), e.to_string(), - Some(call.head), - Some("The only allowed column types for dummies are String or Int".into()), + Some(mask_span), + Some("Perhaps you want to use a series with booleans as mask".into()), Vec::new(), ) - }) - .map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None)) + })?; + + df.as_ref() + .filter(mask) + .map_err(|e| { + ShellError::GenericError( + "Error filtering dataframe".into(), + e.to_string(), + Some(call.head), + Some("The only allowed column types for dummies are String or Int".into()), + Vec::new(), + ) + }) + .map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None)) + } } fn command_lazy( @@ -120,7 +145,7 @@ fn command_lazy( let lazy = lazy.apply_with_expr(expr, LazyFrame::filter); Ok(PipelineData::Value( - NuLazyFrame::into_value(lazy, call.head), + NuLazyFrame::into_value(lazy, call.head)?, None, )) } @@ -129,9 +154,10 @@ fn command_lazy( mod test { use super::super::super::test_dataframe::test_dataframe; use super::*; + use crate::dataframe::expressions::ExprCol; #[test] fn test_examples() { - test_dataframe(vec![Box::new(FilterWith {})]) + test_dataframe(vec![Box::new(FilterWith {}), Box::new(ExprCol {})]) } } diff --git a/crates/nu-command/src/dataframe/eager/first.rs b/crates/nu-command/src/dataframe/eager/first.rs index 7053b954e0..e09128ce21 100644 --- a/crates/nu-command/src/dataframe/eager/first.rs +++ b/crates/nu-command/src/dataframe/eager/first.rs @@ -22,7 +22,7 @@ impl Command for FirstDF { fn signature(&self) -> Signature { Signature::build(self.name()) .optional("rows", SyntaxShape::Int, "Number of rows for head") - .category(Category::Custom("dataframe".into())) + .category(Category::Custom("dataframe or expression".into())) } fn examples(&self) -> Vec { diff --git a/crates/nu-command/src/dataframe/eager/last.rs b/crates/nu-command/src/dataframe/eager/last.rs index a9cc352d7d..1635cb22f3 100644 --- a/crates/nu-command/src/dataframe/eager/last.rs +++ b/crates/nu-command/src/dataframe/eager/last.rs @@ -22,7 +22,7 @@ impl Command for LastDF { fn signature(&self) -> Signature { Signature::build(self.name()) .optional("rows", SyntaxShape::Int, "Number of rows for tail") - .category(Category::Custom("dataframe".into())) + .category(Category::Custom("dataframe or lazyframe".into())) } fn examples(&self) -> Vec { diff --git a/crates/nu-command/src/dataframe/eager/mod.rs b/crates/nu-command/src/dataframe/eager/mod.rs index 4a54e98c41..76bb832fd0 100644 --- a/crates/nu-command/src/dataframe/eager/mod.rs +++ b/crates/nu-command/src/dataframe/eager/mod.rs @@ -18,7 +18,6 @@ mod rename; mod sample; mod shape; mod slice; -mod sort; mod take; mod to_csv; mod to_df; @@ -48,7 +47,6 @@ pub use rename::RenameDF; pub use sample::SampleDF; pub use shape::ShapeDF; pub use slice::SliceDF; -pub use sort::SortDF; pub use take::TakeDF; pub use to_csv::ToCSV; pub use to_df::ToDataFrame; @@ -88,7 +86,6 @@ pub fn add_eager_decls(working_set: &mut StateWorkingSet) { SampleDF, ShapeDF, SliceDF, - SortDF, TakeDF, ToCSV, ToDataFrame, diff --git a/crates/nu-command/src/dataframe/eager/rename.rs b/crates/nu-command/src/dataframe/eager/rename.rs index 117dba47e8..5b3c18f8f6 100644 --- a/crates/nu-command/src/dataframe/eager/rename.rs +++ b/crates/nu-command/src/dataframe/eager/rename.rs @@ -14,7 +14,7 @@ pub struct RenameDF; impl Command for RenameDF { fn name(&self) -> &str { - "dfr rename-col" + "dfr rename" } fn usage(&self) -> &str { @@ -33,28 +33,65 @@ impl Command for RenameDF { SyntaxShape::Any, "New names for the selected column(s). A string or list of strings", ) - .category(Category::Custom("dataframe".into())) + .category(Category::Custom("dataframe or lazyframe".into())) } fn examples(&self) -> Vec { - vec![Example { - description: "Renames a dataframe column", - example: "[[a b]; [1 2] [3 4]] | dfr to-df | dfr rename-col a a_new", - result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new( - "a_new".to_string(), - vec![Value::test_int(1), Value::test_int(3)], - ), - Column::new( - "b".to_string(), - vec![Value::test_int(2), Value::test_int(4)], - ), - ]) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] + vec![ + Example { + description: "Renames a series", + example: "[5 6 7 8] | dfr to-df | dfr rename '0' new_name", + result: Some( + NuDataFrame::try_from_columns(vec![Column::new( + "new_name".to_string(), + vec![ + Value::test_int(5), + Value::test_int(6), + Value::test_int(7), + Value::test_int(8), + ], + )]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Renames a dataframe column", + example: "[[a b]; [1 2] [3 4]] | dfr to-df | dfr rename a a_new", + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new( + "a_new".to_string(), + vec![Value::test_int(1), Value::test_int(3)], + ), + Column::new( + "b".to_string(), + vec![Value::test_int(2), Value::test_int(4)], + ), + ]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Renames two dataframe columns", + example: "[[a b]; [1 2] [3 4]] | dfr to-df | dfr rename [a b] [a_new b_new]", + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new( + "a_new".to_string(), + vec![Value::test_int(1), Value::test_int(3)], + ), + Column::new( + "b_new".to_string(), + vec![Value::test_int(2), Value::test_int(4)], + ), + ]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + ] } fn run( @@ -133,7 +170,7 @@ fn command_lazy( let lazy = lazy.into_polars(); let lazy: NuLazyFrame = lazy.rename(&columns, &new_names).into(); - Ok(PipelineData::Value(lazy.into_value(call.head), None)) + Ok(PipelineData::Value(lazy.into_value(call.head)?, None)) } #[cfg(test)] diff --git a/crates/nu-command/src/dataframe/eager/sort.rs b/crates/nu-command/src/dataframe/eager/sort.rs deleted file mode 100644 index 9ee9b3adf3..0000000000 --- a/crates/nu-command/src/dataframe/eager/sort.rs +++ /dev/null @@ -1,148 +0,0 @@ -use nu_engine::CallExt; -use nu_protocol::{ - ast::Call, - engine::{Command, EngineState, Stack}, - Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value, -}; - -use crate::dataframe::values::{utils::convert_columns_string, Column}; - -use super::super::values::NuDataFrame; - -#[derive(Clone)] -pub struct SortDF; - -impl Command for SortDF { - fn name(&self) -> &str { - "dfr sort" - } - - fn usage(&self) -> &str { - "Creates new sorted dataframe or series" - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .switch("reverse", "invert sort", Some('r')) - .rest("rest", SyntaxShape::Any, "column names to sort dataframe") - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![ - Example { - description: "Create new sorted dataframe", - example: "[[a b]; [3 4] [1 2]] | dfr to-df | dfr sort a", - result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new( - "a".to_string(), - vec![Value::test_int(1), Value::test_int(3)], - ), - Column::new( - "b".to_string(), - vec![Value::test_int(2), Value::test_int(4)], - ), - ]) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - Example { - description: "Create new sorted series", - example: "[3 4 1 2] | dfr to-df | dfr sort", - result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "0".to_string(), - vec![ - Value::test_int(1), - Value::test_int(2), - Value::test_int(3), - Value::test_int(4), - ], - )]) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - ] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let reverse = call.has_flag("reverse"); - - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - - if df.is_series() { - let columns = df.as_ref().get_column_names(); - - df.as_ref() - .sort(columns, reverse) - .map_err(|e| { - ShellError::GenericError( - "Error sorting dataframe".into(), - e.to_string(), - Some(call.head), - None, - Vec::new(), - ) - }) - .map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None)) - } else { - let columns: Vec = call.rest(engine_state, stack, 0)?; - - if !columns.is_empty() { - let (col_string, col_span) = convert_columns_string(columns, call.head)?; - - df.as_ref() - .sort(&col_string, reverse) - .map_err(|e| { - ShellError::GenericError( - "Error sorting dataframe".into(), - e.to_string(), - Some(col_span), - None, - Vec::new(), - ) - }) - .map(|df| { - PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None) - }) - } else { - Err(ShellError::GenericError( - "Missing columns".into(), - "missing column name to perform sort".into(), - Some(call.head), - None, - Vec::new(), - )) - } - } -} - -#[cfg(test)] -mod test { - use super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(SortDF {})]) - } -} diff --git a/crates/nu-command/src/dataframe/eager/with_column.rs b/crates/nu-command/src/dataframe/eager/with_column.rs index a506ac8dbd..8f5314f0db 100644 --- a/crates/nu-command/src/dataframe/eager/with_column.rs +++ b/crates/nu-command/src/dataframe/eager/with_column.rs @@ -27,7 +27,7 @@ impl Command for WithColumn { SyntaxShape::Any, "series to be added or expressions used to define the new columns", ) - .category(Category::Custom("dataframe".into())) + .category(Category::Custom("dataframe or lazyframe".into())) } fn examples(&self) -> Vec { @@ -59,11 +59,34 @@ impl Command for WithColumn { Example { description: "Adds a series to the dataframe", example: r#"[[a b]; [1 2] [3 4]] - | dfr to-df | dfr to-lazy - | dfr with-column ((dfr col a) * 2 | dfr as "c") + | dfr with-column [ + ((dfr col a) * 2 | dfr as "c") + ((dfr col a) * 3 | dfr as "d") + ] | dfr collect"#, - result: None, + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new( + "a".to_string(), + vec![Value::test_int(1), Value::test_int(3)], + ), + Column::new( + "b".to_string(), + vec![Value::test_int(2), Value::test_int(4)], + ), + Column::new( + "c".to_string(), + vec![Value::test_int(2), Value::test_int(6)], + ), + Column::new( + "d".to_string(), + vec![Value::test_int(3), Value::test_int(9)], + ), + ]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), }, ] } @@ -85,7 +108,7 @@ impl Command for WithColumn { command_eager(engine_state, stack, call, df) } else { Err(ShellError::CantConvert( - "expression or query".into(), + "lazy or eager dataframe".into(), value.get_type().to_string(), value.span()?, None, @@ -100,34 +123,49 @@ fn command_eager( call: &Call, mut df: NuDataFrame, ) -> Result { - let other_value: Value = call.req(engine_state, stack, 0)?; - let other_span = other_value.span()?; - let mut other = NuDataFrame::try_from_value(other_value)?.as_series(other_span)?; + let new_column: Value = call.req(engine_state, stack, 0)?; + let column_span = new_column.span()?; - let name = match call.get_flag::(engine_state, stack, "name")? { - Some(name) => name, - None => other.name().to_string(), - }; + if NuExpression::can_downcast(&new_column) { + let vals: Vec = call.rest(engine_state, stack, 0)?; + let value = Value::List { + vals, + span: call.head, + }; + let expressions = NuExpression::extract_exprs(value)?; + let lazy = NuLazyFrame::new(true, df.lazy().with_columns(&expressions)); - let series = other.rename(&name).clone(); + let df = lazy.collect(call.head)?; - df.as_mut() - .with_column(series) - .map_err(|e| { - ShellError::GenericError( - "Error adding column to dataframe".into(), - e.to_string(), - Some(other_span), - None, - Vec::new(), - ) - }) - .map(|df| { - PipelineData::Value( - NuDataFrame::dataframe_into_value(df.clone(), call.head), - None, - ) - }) + Ok(PipelineData::Value(df.into_value(call.head), None)) + } else { + let mut other = NuDataFrame::try_from_value(new_column)?.as_series(column_span)?; + + let name = match call.get_flag::(engine_state, stack, "name")? { + Some(name) => name, + None => other.name().to_string(), + }; + + let series = other.rename(&name).clone(); + + df.as_mut() + .with_column(series) + .map_err(|e| { + ShellError::GenericError( + "Error adding column to dataframe".into(), + e.to_string(), + Some(column_span), + None, + Vec::new(), + ) + }) + .map(|df| { + PipelineData::Value( + NuDataFrame::dataframe_into_value(df.clone(), call.head), + None, + ) + }) + } } fn command_lazy( @@ -146,7 +184,7 @@ fn command_lazy( let lazy: NuLazyFrame = lazy.into_polars().with_columns(&expressions).into(); Ok(PipelineData::Value( - NuLazyFrame::into_value(lazy, call.head), + NuLazyFrame::into_value(lazy, call.head)?, None, )) } @@ -155,9 +193,15 @@ fn command_lazy( mod test { use super::super::super::test_dataframe::test_dataframe; use super::*; + use crate::dataframe::expressions::ExprAlias; + use crate::dataframe::expressions::ExprCol; #[test] fn test_examples() { - test_dataframe(vec![Box::new(WithColumn {})]) + test_dataframe(vec![ + Box::new(WithColumn {}), + Box::new(ExprAlias {}), + Box::new(ExprCol {}), + ]) } } diff --git a/crates/nu-command/src/dataframe/expressions/alias.rs b/crates/nu-command/src/dataframe/expressions/alias.rs index d5dd02def2..0fe590b20a 100644 --- a/crates/nu-command/src/dataframe/expressions/alias.rs +++ b/crates/nu-command/src/dataframe/expressions/alias.rs @@ -4,7 +4,7 @@ use nu_engine::CallExt; use nu_protocol::{ ast::Call, engine::{Command, EngineState, Stack}, - Category, Example, PipelineData, ShellError, Signature, SyntaxShape, + Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value, }; #[derive(Clone)] @@ -26,14 +26,34 @@ impl Command for ExprAlias { SyntaxShape::String, "Alias name for the expression", ) - .category(Category::Custom("expressions".into())) + .category(Category::Custom("expression".into())) } fn examples(&self) -> Vec { vec![Example { description: "Creates and alias expression", - example: "(dfr col a | df as new_a)", - result: None, + example: "dfr col a | dfr as new_a | dfr as-nu", + result: { + let cols = vec!["expr".into(), "value".into()]; + let expr = Value::test_string("column"); + let value = Value::test_string("a"); + let expr = Value::Record { + cols, + vals: vec![expr, value], + span: Span::test_data(), + }; + + let cols = vec!["expr".into(), "alias".into()]; + let value = Value::test_string("new_a"); + + let record = Value::Record { + cols, + vals: vec![expr, value], + span: Span::test_data(), + }; + + Some(record) + }, }] } @@ -55,3 +75,20 @@ impl Command for ExprAlias { )) } } + +#[cfg(test)] +mod test { + use super::super::super::test_dataframe::test_dataframe; + use super::*; + use crate::dataframe::expressions::ExprAsNu; + use crate::dataframe::expressions::ExprCol; + + #[test] + fn test_examples() { + test_dataframe(vec![ + Box::new(ExprAlias {}), + Box::new(ExprCol {}), + Box::new(ExprAsNu {}), + ]) + } +} diff --git a/crates/nu-command/src/dataframe/expressions/to_nu.rs b/crates/nu-command/src/dataframe/expressions/as_nu.rs similarity index 89% rename from crates/nu-command/src/dataframe/expressions/to_nu.rs rename to crates/nu-command/src/dataframe/expressions/as_nu.rs index c241b1f1c7..54fd9f498f 100644 --- a/crates/nu-command/src/dataframe/expressions/to_nu.rs +++ b/crates/nu-command/src/dataframe/expressions/as_nu.rs @@ -7,11 +7,11 @@ use nu_protocol::{ }; #[derive(Clone)] -pub struct ExprToNu; +pub struct ExprAsNu; -impl Command for ExprToNu { +impl Command for ExprAsNu { fn name(&self) -> &str { - "dfr to-nu" + "dfr as-nu" } fn usage(&self) -> &str { @@ -19,13 +19,13 @@ impl Command for ExprToNu { } fn signature(&self) -> Signature { - Signature::build(self.name()).category(Category::Custom("expressions".into())) + Signature::build(self.name()).category(Category::Custom("expression".into())) } fn examples(&self) -> Vec { vec![Example { description: "Convert a col expression into a nushell value", - example: "dfr col col_a | dfr to-nu", + example: "dfr col col_a | dfr as-nu", result: Some(Value::Record { cols: vec!["expr".into(), "value".into()], vals: vec![ @@ -65,6 +65,6 @@ mod test { #[test] fn test_examples() { - test_dataframe(vec![Box::new(ExprToNu {}), Box::new(ExprCol {})]) + test_dataframe(vec![Box::new(ExprAsNu {}), Box::new(ExprCol {})]) } } diff --git a/crates/nu-command/src/dataframe/expressions/dsl/col.rs b/crates/nu-command/src/dataframe/expressions/col.rs similarity index 85% rename from crates/nu-command/src/dataframe/expressions/dsl/col.rs rename to crates/nu-command/src/dataframe/expressions/col.rs index 569c4f1d7f..4bd783e351 100644 --- a/crates/nu-command/src/dataframe/expressions/dsl/col.rs +++ b/crates/nu-command/src/dataframe/expressions/col.rs @@ -26,13 +26,13 @@ impl Command for ExprCol { SyntaxShape::String, "Name of column to be used", ) - .category(Category::Custom("expressions".into())) + .category(Category::Custom("expression".into())) } fn examples(&self) -> Vec { vec![Example { description: "Creates a named column expression and converts it to a nu object", - example: "dfr col col_a | dfr to-nu", + example: "dfr col col_a | dfr as-nu", result: Some(Value::Record { cols: vec!["expr".into(), "value".into()], vals: vec![ @@ -66,12 +66,12 @@ impl Command for ExprCol { #[cfg(test)] mod test { - use super::super::super::super::test_dataframe::test_dataframe; - use super::super::super::ExprToNu; + use super::super::super::test_dataframe::test_dataframe; use super::*; + use crate::dataframe::expressions::as_nu::ExprAsNu; #[test] fn test_examples() { - test_dataframe(vec![Box::new(ExprCol {}), Box::new(ExprToNu {})]) + test_dataframe(vec![Box::new(ExprCol {}), Box::new(ExprAsNu {})]) } } diff --git a/crates/nu-command/src/dataframe/expressions/dsl/mod.rs b/crates/nu-command/src/dataframe/expressions/dsl/mod.rs deleted file mode 100644 index b31e6726a8..0000000000 --- a/crates/nu-command/src/dataframe/expressions/dsl/mod.rs +++ /dev/null @@ -1,7 +0,0 @@ -mod col; -mod lit; -mod when; - -pub(super) use crate::dataframe::expressions::dsl::col::ExprCol; -pub(super) use crate::dataframe::expressions::dsl::lit::ExprLit; -pub(super) use crate::dataframe::expressions::dsl::when::ExprWhen; diff --git a/crates/nu-command/src/dataframe/expressions/dsl/when.rs b/crates/nu-command/src/dataframe/expressions/dsl/when.rs deleted file mode 100644 index fe58959189..0000000000 --- a/crates/nu-command/src/dataframe/expressions/dsl/when.rs +++ /dev/null @@ -1,96 +0,0 @@ -use crate::dataframe::values::NuExpression; -use nu_engine::CallExt; -use nu_protocol::{ - ast::Call, - engine::{Command, EngineState, Stack}, - Category, Example, PipelineData, ShellError, Signature, SyntaxShape, Value, -}; -use polars::prelude::when; - -#[derive(Clone)] -pub struct ExprWhen; - -impl Command for ExprWhen { - fn name(&self) -> &str { - "dfr when" - } - - fn usage(&self) -> &str { - "Creates a when expression" - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .required( - "when predicate", - SyntaxShape::Any, - "Name of column to be used", - ) - .required_named( - "then", - SyntaxShape::Any, - "Expression that will be applied when predicate is true", - Some('t'), - ) - .required_named( - "otherwise", - SyntaxShape::Any, - "Expression that will be applied when predicate is false", - Some('o'), - ) - .category(Category::Custom("expressions".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Create a new column for the dataframe", - example: r#"[[a b]; [1 2] [3 4]] - | dfr to-df - | dfr to-lazy - | dfr with-column ( - dfr when ((dfr col a) > 2) --then 4 --otherwise 5 | dfr as "c" - ) - | dfr collect"#, - result: None, - }] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - _input: PipelineData, - ) -> Result { - let predicate: Value = call.req(engine_state, stack, 0)?; - let predicate = NuExpression::try_from_value(predicate)?; - - let then: Value = call - .get_flag(engine_state, stack, "then")? - .expect("it is a required named value"); - let then = NuExpression::try_from_value(then)?; - let otherwise: Value = call - .get_flag(engine_state, stack, "otherwise")? - .expect("it is a required named value"); - let otherwise = NuExpression::try_from_value(otherwise)?; - - let expr: NuExpression = when(predicate.into_polars()) - .then(then.into_polars()) - .otherwise(otherwise.into_polars()) - .into(); - - Ok(PipelineData::Value(expr.into_value(call.head), None)) - } -} - -#[cfg(test)] -mod test { - use super::super::super::super::test_dataframe::test_dataframe; - use super::super::super::ExprToNu; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(ExprWhen {}), Box::new(ExprToNu {})]) - } -} diff --git a/crates/nu-command/src/dataframe/expressions/expressions_macro.rs b/crates/nu-command/src/dataframe/expressions/expressions_macro.rs index 26b9609edd..17149eec50 100644 --- a/crates/nu-command/src/dataframe/expressions/expressions_macro.rs +++ b/crates/nu-command/src/dataframe/expressions/expressions_macro.rs @@ -26,7 +26,7 @@ macro_rules! expr_command { } fn signature(&self) -> Signature { - Signature::build(self.name()).category(Category::Custom("dataframe".into())) + Signature::build(self.name()).category(Category::Custom("expression".into())) } fn examples(&self) -> Vec { @@ -107,3 +107,17 @@ expr_command!( }], explode ); + +// ExprCount command +// Expands to a command definition for a count expression +expr_command!( + ExprCount, + "dfr count", + "creates a count expression", + vec![Example { + description: "", + example: "", + result: None, + }], + count +); diff --git a/crates/nu-command/src/dataframe/expressions/dsl/lit.rs b/crates/nu-command/src/dataframe/expressions/lit.rs similarity index 86% rename from crates/nu-command/src/dataframe/expressions/dsl/lit.rs rename to crates/nu-command/src/dataframe/expressions/lit.rs index a631f3b437..3fe9647abc 100644 --- a/crates/nu-command/src/dataframe/expressions/dsl/lit.rs +++ b/crates/nu-command/src/dataframe/expressions/lit.rs @@ -25,13 +25,13 @@ impl Command for ExprLit { SyntaxShape::Any, "literal to construct the expression", ) - .category(Category::Custom("expressions".into())) + .category(Category::Custom("expression".into())) } fn examples(&self) -> Vec { vec![Example { description: "Created a literal expression and converts it to a nu object", - example: "dfr lit 2 | dfr to-nu", + example: "dfr lit 2 | dfr as-nu", result: Some(Value::Record { cols: vec!["expr".into(), "value".into()], vals: vec![ @@ -68,12 +68,12 @@ impl Command for ExprLit { #[cfg(test)] mod test { - use super::super::super::super::test_dataframe::test_dataframe; - use super::super::super::ExprToNu; + use super::super::super::test_dataframe::test_dataframe; use super::*; + use crate::dataframe::expressions::as_nu::ExprAsNu; #[test] fn test_examples() { - test_dataframe(vec![Box::new(ExprLit {}), Box::new(ExprToNu {})]) + test_dataframe(vec![Box::new(ExprLit {}), Box::new(ExprAsNu {})]) } } diff --git a/crates/nu-command/src/dataframe/expressions/mod.rs b/crates/nu-command/src/dataframe/expressions/mod.rs index ed1338f432..63d2f58542 100644 --- a/crates/nu-command/src/dataframe/expressions/mod.rs +++ b/crates/nu-command/src/dataframe/expressions/mod.rs @@ -1,15 +1,20 @@ mod alias; -mod dsl; +mod as_nu; +mod col; mod expressions_macro; -mod to_nu; +mod lit; +mod otherwise; +mod when; use nu_protocol::engine::StateWorkingSet; -use crate::dataframe::expressions::dsl::*; - -use crate::dataframe::expressions::alias::ExprAlias; -use crate::dataframe::expressions::expressions_macro::*; -use crate::dataframe::expressions::to_nu::ExprToNu; +pub(crate) use crate::dataframe::expressions::alias::ExprAlias; +use crate::dataframe::expressions::as_nu::ExprAsNu; +pub(super) use crate::dataframe::expressions::col::ExprCol; +pub(crate) use crate::dataframe::expressions::expressions_macro::*; +pub(super) use crate::dataframe::expressions::lit::ExprLit; +pub(super) use crate::dataframe::expressions::otherwise::ExprOtherwise; +pub(super) use crate::dataframe::expressions::when::ExprWhen; pub fn add_expressions(working_set: &mut StateWorkingSet) { macro_rules! bind_command { @@ -25,9 +30,11 @@ pub fn add_expressions(working_set: &mut StateWorkingSet) { bind_command!( ExprAlias, ExprCol, + ExprCount, ExprLit, - ExprToNu, + ExprAsNu, ExprWhen, + ExprOtherwise, ExprList, ExprAggGroups, ExprFlatten, diff --git a/crates/nu-command/src/dataframe/expressions/otherwise.rs b/crates/nu-command/src/dataframe/expressions/otherwise.rs new file mode 100644 index 0000000000..df3f2210aa --- /dev/null +++ b/crates/nu-command/src/dataframe/expressions/otherwise.rs @@ -0,0 +1,125 @@ +use crate::dataframe::values::{Column, NuDataFrame, NuExpression, NuWhen}; +use nu_engine::CallExt; +use nu_protocol::{ + ast::Call, + engine::{Command, EngineState, Stack}, + Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value, +}; + +#[derive(Clone)] +pub struct ExprOtherwise; + +impl Command for ExprOtherwise { + fn name(&self) -> &str { + "dfr otherwise" + } + + fn usage(&self) -> &str { + "completes a when expression" + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .required( + "otherwise expression", + SyntaxShape::Any, + "expressioini to apply when no when predicate matches", + ) + .category(Category::Custom("expression".into())) + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "Create a when conditions", + example: "dfr when ((dfr col a) > 2) 4 | dfr otherwise 5", + result: None, + }, + Example { + description: "Create a when conditions", + example: + "dfr when ((dfr col a) > 2) 4 | dfr when ((dfr col a) < 0) 6 | dfr otherwise 0", + result: None, + }, + Example { + description: "Create a new column for the dataframe", + example: r#"[[a b]; [6 2] [1 4] [4 1]] + | dfr to-lazy + | dfr with-column ( + dfr when ((dfr col a) > 2) 4 | dfr otherwise 5 | dfr as c + ) + | dfr with-column ( + dfr when ((dfr col a) > 5) 10 | dfr when ((dfr col a) < 2) 6 | dfr otherwise 0 | dfr as d + ) + | dfr collect"#, + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new( + "a".to_string(), + vec![Value::test_int(6), Value::test_int(1), Value::test_int(4)], + ), + Column::new( + "b".to_string(), + vec![Value::test_int(2), Value::test_int(4), Value::test_int(1)], + ), + Column::new( + "c".to_string(), + vec![Value::test_int(4), Value::test_int(5), Value::test_int(4)], + ), + Column::new( + "d".to_string(), + vec![Value::test_int(10), Value::test_int(6), Value::test_int(0)], + ), + ]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + ] + } + + fn run( + &self, + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, + input: PipelineData, + ) -> Result { + let otherwise_predicate: Value = call.req(engine_state, stack, 0)?; + let otherwise_predicate = NuExpression::try_from_value(otherwise_predicate)?; + + let value = input.into_value(call.head); + let complete: NuExpression = match NuWhen::try_from_value(value)? { + NuWhen::WhenThen(when_then) => when_then + .otherwise(otherwise_predicate.into_polars()) + .into(), + NuWhen::WhenThenThen(when_then_then) => when_then_then + .otherwise(otherwise_predicate.into_polars()) + .into(), + }; + + Ok(PipelineData::Value(complete.into_value(call.head), None)) + } +} + +#[cfg(test)] +mod test { + use super::super::super::test_dataframe::test_dataframe; + use crate::dataframe::eager::WithColumn; + use crate::dataframe::expressions::when::ExprWhen; + use crate::dataframe::expressions::{ExprAlias, ExprAsNu, ExprCol}; + + use super::*; + + #[test] + fn test_examples() { + test_dataframe(vec![ + Box::new(WithColumn {}), + Box::new(ExprCol {}), + Box::new(ExprAlias {}), + Box::new(ExprWhen {}), + Box::new(ExprOtherwise {}), + Box::new(ExprAsNu {}), + ]) + } +} diff --git a/crates/nu-command/src/dataframe/expressions/when.rs b/crates/nu-command/src/dataframe/expressions/when.rs new file mode 100644 index 0000000000..722d3d3699 --- /dev/null +++ b/crates/nu-command/src/dataframe/expressions/when.rs @@ -0,0 +1,140 @@ +use crate::dataframe::values::{Column, NuDataFrame, NuExpression, NuWhen}; +use nu_engine::CallExt; +use nu_protocol::{ + ast::Call, + engine::{Command, EngineState, Stack}, + Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value, +}; +use polars::prelude::when; + +#[derive(Clone)] +pub struct ExprWhen; + +impl Command for ExprWhen { + fn name(&self) -> &str { + "dfr when" + } + + fn usage(&self) -> &str { + "Creates and modifies a when expression" + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .required( + "when expression", + SyntaxShape::Any, + "when expression used for matching", + ) + .required( + "then expression", + SyntaxShape::Any, + "expression that will be applied when predicate is true", + ) + .category(Category::Custom("expression".into())) + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "Create a when conditions", + example: "dfr when ((dfr col a) > 2) 4", + result: None, + }, + Example { + description: "Create a when conditions", + example: "dfr when ((dfr col a) > 2) 4 | dfr when ((dfr col a) < 0) 6", + result: None, + }, + Example { + description: "Create a new column for the dataframe", + example: r#"[[a b]; [6 2] [1 4] [4 1]] + | dfr to-lazy + | dfr with-column ( + dfr when ((dfr col a) > 2) 4 | dfr otherwise 5 | dfr as c + ) + | dfr with-column ( + dfr when ((dfr col a) > 5) 10 | dfr when ((dfr col a) < 2) 6 | dfr otherwise 0 | dfr as d + ) + | dfr collect"#, + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new( + "a".to_string(), + vec![Value::test_int(6), Value::test_int(1), Value::test_int(4)], + ), + Column::new( + "b".to_string(), + vec![Value::test_int(2), Value::test_int(4), Value::test_int(1)], + ), + Column::new( + "c".to_string(), + vec![Value::test_int(4), Value::test_int(5), Value::test_int(4)], + ), + Column::new( + "d".to_string(), + vec![Value::test_int(10), Value::test_int(6), Value::test_int(0)], + ), + ]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + ] + } + + fn run( + &self, + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, + input: PipelineData, + ) -> Result { + let when_predicate: Value = call.req(engine_state, stack, 0)?; + let when_predicate = NuExpression::try_from_value(when_predicate)?; + + let then_predicate: Value = call.req(engine_state, stack, 1)?; + let then_predicate = NuExpression::try_from_value(then_predicate)?; + + let value = input.into_value(call.head); + let when_then: NuWhen = match value { + Value::Nothing { .. } => when(when_predicate.into_polars()) + .then(then_predicate.into_polars()) + .into(), + v => match NuWhen::try_from_value(v)? { + NuWhen::WhenThen(when_then) => when_then + .when(when_predicate.into_polars()) + .then(then_predicate.into_polars()) + .into(), + NuWhen::WhenThenThen(when_then_then) => when_then_then + .when(when_predicate.into_polars()) + .then(then_predicate.into_polars()) + .into(), + }, + }; + + Ok(PipelineData::Value(when_then.into_value(call.head), None)) + } +} + +#[cfg(test)] +mod test { + use super::super::super::test_dataframe::test_dataframe; + use crate::dataframe::eager::WithColumn; + use crate::dataframe::expressions::otherwise::ExprOtherwise; + use crate::dataframe::expressions::{ExprAlias, ExprAsNu, ExprCol}; + + use super::*; + + #[test] + fn test_examples() { + test_dataframe(vec![ + Box::new(WithColumn {}), + Box::new(ExprCol {}), + Box::new(ExprAlias {}), + Box::new(ExprWhen {}), + Box::new(ExprOtherwise {}), + Box::new(ExprAsNu {}), + ]) + } +} diff --git a/crates/nu-command/src/dataframe/lazy/aggregate.rs b/crates/nu-command/src/dataframe/lazy/aggregate.rs index d421df9be9..957eb750e9 100644 --- a/crates/nu-command/src/dataframe/lazy/aggregate.rs +++ b/crates/nu-command/src/dataframe/lazy/aggregate.rs @@ -1,10 +1,10 @@ -use crate::dataframe::values::{NuExpression, NuLazyFrame, NuLazyGroupBy}; +use crate::dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame, NuLazyGroupBy}; use nu_engine::CallExt; use nu_protocol::{ ast::Call, engine::{Command, EngineState, Stack}, - Category, Example, PipelineData, ShellError, Signature, SyntaxShape, Value, + Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value, }; #[derive(Clone)] @@ -12,7 +12,7 @@ pub struct LazyAggregate; impl Command for LazyAggregate { fn name(&self) -> &str { - "dfr aggregate" + "dfr agg" } fn usage(&self) -> &str { @@ -36,26 +36,67 @@ impl Command for LazyAggregate { example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]] | dfr to-df | dfr group-by a - | dfr aggregate [ + | dfr agg [ ("b" | dfr min | dfr as "b_min") ("b" | dfr max | dfr as "b_max") ("b" | dfr sum | dfr as "b_sum") ]"#, - result: None, + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new( + "a".to_string(), + vec![Value::test_int(1), Value::test_int(2)], + ), + Column::new( + "b_min".to_string(), + vec![Value::test_int(2), Value::test_int(4)], + ), + Column::new( + "b_max".to_string(), + vec![Value::test_int(4), Value::test_int(6)], + ), + Column::new( + "b_sum".to_string(), + vec![Value::test_int(6), Value::test_int(10)], + ), + ]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), }, Example { description: "Group by and perform an aggregation", example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]] - | dfr to-df | dfr to-lazy | dfr group-by a - | dfr aggregate [ + | dfr agg [ ("b" | dfr min | dfr as "b_min") ("b" | dfr max | dfr as "b_max") ("b" | dfr sum | dfr as "b_sum") ] | dfr collect"#, - result: None, + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new( + "a".to_string(), + vec![Value::test_int(1), Value::test_int(2)], + ), + Column::new( + "b_min".to_string(), + vec![Value::test_int(2), Value::test_int(4)], + ), + Column::new( + "b_max".to_string(), + vec![Value::test_int(4), Value::test_int(6)], + ), + Column::new( + "b_sum".to_string(), + vec![Value::test_int(6), Value::test_int(10)], + ), + ]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), }, ] } @@ -78,14 +119,33 @@ impl Command for LazyAggregate { let from_eager = group_by.from_eager; let group_by = group_by.into_polars(); - let lazy: NuLazyFrame = group_by.agg(&expressions).into(); - - let res = if from_eager { - lazy.collect(call.head)?.into_value(call.head) - } else { - lazy.into_value(call.head) + let lazy = NuLazyFrame { + lazy: group_by.agg(&expressions).into(), + from_eager, }; + let res = lazy.into_value(call.head)?; Ok(PipelineData::Value(res, None)) } } + +#[cfg(test)] +mod test { + use super::super::super::test_dataframe::test_dataframe; + use super::*; + use crate::dataframe::expressions::ExprAlias; + use crate::dataframe::lazy::groupby::ToLazyGroupBy; + use crate::dataframe::lazy::{LazyMax, LazyMin, LazySum}; + + #[test] + fn test_examples() { + test_dataframe(vec![ + Box::new(LazyAggregate {}), + Box::new(ToLazyGroupBy {}), + Box::new(ExprAlias {}), + Box::new(LazyMin {}), + Box::new(LazyMax {}), + Box::new(LazySum {}), + ]) + } +} diff --git a/crates/nu-command/src/dataframe/lazy/collect.rs b/crates/nu-command/src/dataframe/lazy/collect.rs index 6f93aff437..9691434edf 100644 --- a/crates/nu-command/src/dataframe/lazy/collect.rs +++ b/crates/nu-command/src/dataframe/lazy/collect.rs @@ -1,9 +1,10 @@ -use super::super::values::{NuDataFrame, NuLazyFrame}; +use crate::dataframe::values::{Column, NuDataFrame}; +use super::super::values::NuLazyFrame; use nu_protocol::{ ast::Call, engine::{Command, EngineState, Stack}, - Category, Example, PipelineData, ShellError, Signature, + Category, Example, PipelineData, ShellError, Signature, Span, Value, }; #[derive(Clone)] @@ -15,7 +16,7 @@ impl Command for LazyCollect { } fn usage(&self) -> &str { - "Collect lazy dataframe into dataframe" + "Collect lazy dataframe into eager dataframe" } fn signature(&self) -> Signature { @@ -24,9 +25,22 @@ impl Command for LazyCollect { fn examples(&self) -> Vec { vec![Example { - description: "", - example: "", - result: None, + description: "drop duplicates", + example: "[[a b]; [1 2] [3 4]] | dfr to-lazy | dfr collect", + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new( + "a".to_string(), + vec![Value::test_int(1), Value::test_int(3)], + ), + Column::new( + "b".to_string(), + vec![Value::test_int(2), Value::test_int(4)], + ), + ]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), }] } @@ -39,10 +53,22 @@ impl Command for LazyCollect { ) -> Result { let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?; let eager = lazy.collect(call.head)?; + let value = Value::CustomValue { + val: Box::new(eager), + span: call.head, + }; - Ok(PipelineData::Value( - NuDataFrame::into_value(eager, call.head), - None, - )) + Ok(PipelineData::Value(value, None)) + } +} + +#[cfg(test)] +mod test { + use super::super::super::test_dataframe::test_dataframe; + use super::*; + + #[test] + fn test_examples() { + test_dataframe(vec![Box::new(LazyCollect {})]) } } diff --git a/crates/nu-command/src/dataframe/lazy/fetch.rs b/crates/nu-command/src/dataframe/lazy/fetch.rs index e3cabb17cd..2b0d764712 100644 --- a/crates/nu-command/src/dataframe/lazy/fetch.rs +++ b/crates/nu-command/src/dataframe/lazy/fetch.rs @@ -1,10 +1,10 @@ use super::super::values::NuLazyFrame; -use crate::dataframe::values::NuDataFrame; +use crate::dataframe::values::{Column, NuDataFrame}; use nu_engine::CallExt; use nu_protocol::{ ast::Call, engine::{Command, EngineState, Stack}, - Category, Example, PipelineData, ShellError, Signature, SyntaxShape, + Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value, }; #[derive(Clone)] @@ -31,9 +31,22 @@ impl Command for LazyFetch { fn examples(&self) -> Vec { vec![Example { - description: "", - example: "", - result: None, + description: "Fetch a rows from the dataframe", + example: "[[a b]; [6 2] [4 2] [2 2]] | dfr to-df | dfr fetch 2", + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new( + "a".to_string(), + vec![Value::test_int(6), Value::test_int(4)], + ), + Column::new( + "b".to_string(), + vec![Value::test_int(2), Value::test_int(2)], + ), + ]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), }] } @@ -68,13 +81,13 @@ impl Command for LazyFetch { } } -//#[cfg(test)] -//mod test { -// use super::super::super::test_dataframe::test_dataframe; -// use super::*; -// -// #[test] -// fn test_examples() { -// test_dataframe(vec![Box::new(LazyFetch {})]) -// } -//} +#[cfg(test)] +mod test { + use super::super::super::test_dataframe::test_dataframe; + use super::*; + + #[test] + fn test_examples() { + test_dataframe(vec![Box::new(LazyFetch {})]) + } +} diff --git a/crates/nu-command/src/dataframe/lazy/fill_na.rs b/crates/nu-command/src/dataframe/lazy/fill_na.rs index 7c0adbc023..d6797a0f49 100644 --- a/crates/nu-command/src/dataframe/lazy/fill_na.rs +++ b/crates/nu-command/src/dataframe/lazy/fill_na.rs @@ -44,22 +44,23 @@ impl Command for LazyFillNA { input: PipelineData, ) -> Result { let fill: Value = call.req(engine_state, stack, 0)?; + let value = input.into_value(call.head); - let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?.into_polars(); - let expr = NuExpression::try_from_value(fill)?.into_polars(); - let lazy: NuLazyFrame = lazy.fill_nan(expr).into(); + if NuExpression::can_downcast(&value) { + let expr = NuExpression::try_from_value(value)?; + let fill = NuExpression::try_from_value(fill)?.into_polars(); + let expr: NuExpression = expr.into_polars().fill_nan(fill).into(); - Ok(PipelineData::Value(lazy.into_value(call.head), None)) + Ok(PipelineData::Value( + NuExpression::into_value(expr, call.head), + None, + )) + } else { + let lazy = NuLazyFrame::try_from_value(value)?; + let expr = NuExpression::try_from_value(fill)?.into_polars(); + let lazy = NuLazyFrame::new(lazy.from_eager, lazy.into_polars().fill_nan(expr)); + + Ok(PipelineData::Value(lazy.into_value(call.head)?, None)) + } } } - -//#[cfg(test)] -//mod test { -// use super::super::super::test_dataframe::test_dataframe; -// use super::*; -// -// #[test] -// fn test_examples() { -// test_dataframe(vec![Box::new(LazyFillNA {})]) -// } -//} diff --git a/crates/nu-command/src/dataframe/lazy/fill_null.rs b/crates/nu-command/src/dataframe/lazy/fill_null.rs index 4fdb041a2d..8f583b54f8 100644 --- a/crates/nu-command/src/dataframe/lazy/fill_null.rs +++ b/crates/nu-command/src/dataframe/lazy/fill_null.rs @@ -1,9 +1,9 @@ -use crate::dataframe::values::{NuExpression, NuLazyFrame}; +use crate::dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame}; use nu_engine::CallExt; use nu_protocol::{ ast::Call, engine::{Command, EngineState, Stack}, - Category, Example, PipelineData, ShellError, Signature, SyntaxShape, Value, + Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value, }; #[derive(Clone)] @@ -30,9 +30,22 @@ impl Command for LazyFillNull { fn examples(&self) -> Vec { vec![Example { - description: "", - example: "", - result: None, + description: "Fills the null values by 0", + example: "[1 2 2 3 3] | dfr to-df | dfr shift 2 | dfr fill-null 0", + result: Some( + NuDataFrame::try_from_columns(vec![Column::new( + "0".to_string(), + vec![ + Value::test_int(0), + Value::test_int(0), + Value::test_int(1), + Value::test_int(2), + Value::test_int(2), + ], + )]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), }] } @@ -44,22 +57,35 @@ impl Command for LazyFillNull { input: PipelineData, ) -> Result { let fill: Value = call.req(engine_state, stack, 0)?; + let value = input.into_value(call.head); - let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?.into_polars(); - let expr = NuExpression::try_from_value(fill)?.into_polars(); - let lazy: NuLazyFrame = lazy.fill_null(expr).into(); + if NuExpression::can_downcast(&value) { + let expr = NuExpression::try_from_value(value)?; + let fill = NuExpression::try_from_value(fill)?.into_polars(); + let expr: NuExpression = expr.into_polars().fill_null(fill).into(); - Ok(PipelineData::Value(lazy.into_value(call.head), None)) + Ok(PipelineData::Value( + NuExpression::into_value(expr, call.head), + None, + )) + } else { + let lazy = NuLazyFrame::try_from_value(value)?; + let expr = NuExpression::try_from_value(fill)?.into_polars(); + let lazy = NuLazyFrame::new(lazy.from_eager, lazy.into_polars().fill_null(expr)); + + Ok(PipelineData::Value(lazy.into_value(call.head)?, None)) + } } } -//#[cfg(test)] -//mod test { -// use super::super::super::test_dataframe::test_dataframe; -// use super::*; -// -// #[test] -// fn test_examples() { -// test_dataframe(vec![Box::new(LazyFillNull {})]) -// } -//} +#[cfg(test)] +mod test { + use super::super::super::series::Shift; + use super::super::super::test_dataframe::test_dataframe; + use super::*; + + #[test] + fn test_examples() { + test_dataframe(vec![Box::new(LazyFillNull {}), Box::new(Shift {})]) + } +} diff --git a/crates/nu-command/src/dataframe/lazy/groupby.rs b/crates/nu-command/src/dataframe/lazy/groupby.rs index b05f5df15b..494c548e27 100644 --- a/crates/nu-command/src/dataframe/lazy/groupby.rs +++ b/crates/nu-command/src/dataframe/lazy/groupby.rs @@ -1,9 +1,9 @@ -use crate::dataframe::values::{NuExpression, NuLazyFrame, NuLazyGroupBy}; +use crate::dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame, NuLazyGroupBy}; use nu_engine::CallExt; use nu_protocol::{ ast::Call, engine::{Command, EngineState, Stack}, - Category, Example, PipelineData, ShellError, Signature, SyntaxShape, Value, + Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value, }; use polars::prelude::Expr; @@ -36,26 +36,67 @@ impl Command for ToLazyGroupBy { example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]] | dfr to-df | dfr group-by a - | dfr aggregate [ + | dfr agg [ ("b" | dfr min | dfr as "b_min") ("b" | dfr max | dfr as "b_max") ("b" | dfr sum | dfr as "b_sum") ]"#, - result: None, + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new( + "a".to_string(), + vec![Value::test_int(1), Value::test_int(2)], + ), + Column::new( + "b_min".to_string(), + vec![Value::test_int(2), Value::test_int(4)], + ), + Column::new( + "b_max".to_string(), + vec![Value::test_int(4), Value::test_int(6)], + ), + Column::new( + "b_sum".to_string(), + vec![Value::test_int(6), Value::test_int(10)], + ), + ]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), }, Example { description: "Group by and perform an aggregation", example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]] | dfr to-df - | dfr to-lazy | dfr group-by a - | dfr aggregate [ + | dfr agg [ ("b" | dfr min | dfr as "b_min") ("b" | dfr max | dfr as "b_max") ("b" | dfr sum | dfr as "b_sum") ] | dfr collect"#, - result: None, + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new( + "a".to_string(), + vec![Value::test_int(1), Value::test_int(2)], + ), + Column::new( + "b_min".to_string(), + vec![Value::test_int(2), Value::test_int(4)], + ), + Column::new( + "b_max".to_string(), + vec![Value::test_int(4), Value::test_int(6)], + ), + Column::new( + "b_sum".to_string(), + vec![Value::test_int(6), Value::test_int(10)], + ), + ]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), }, ] } @@ -86,7 +127,8 @@ impl Command for ToLazyGroupBy { } let value = input.into_value(call.head); - let (lazy, from_eager) = NuLazyFrame::maybe_is_eager(value)?; + let lazy = NuLazyFrame::try_from_value(value)?; + let from_eager = lazy.from_eager; let group_by = NuLazyGroupBy { group_by: Some(lazy.into_polars().groupby(&expressions)), @@ -96,3 +138,24 @@ impl Command for ToLazyGroupBy { Ok(PipelineData::Value(group_by.into_value(call.head), None)) } } + +#[cfg(test)] +mod test { + use super::super::super::test_dataframe::test_dataframe; + use super::*; + use crate::dataframe::expressions::ExprAlias; + use crate::dataframe::lazy::aggregate::LazyAggregate; + use crate::dataframe::lazy::{LazyMax, LazyMin, LazySum}; + + #[test] + fn test_examples() { + test_dataframe(vec![ + Box::new(LazyAggregate {}), + Box::new(ToLazyGroupBy {}), + Box::new(ExprAlias {}), + Box::new(LazyMin {}), + Box::new(LazyMax {}), + Box::new(LazySum {}), + ]) + } +} diff --git a/crates/nu-command/src/dataframe/lazy/join.rs b/crates/nu-command/src/dataframe/lazy/join.rs index 032491b887..71eecc03f0 100644 --- a/crates/nu-command/src/dataframe/lazy/join.rs +++ b/crates/nu-command/src/dataframe/lazy/join.rs @@ -1,9 +1,9 @@ -use crate::dataframe::values::{NuExpression, NuLazyFrame}; +use crate::dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame}; use nu_engine::CallExt; use nu_protocol::{ ast::Call, engine::{Command, EngineState, Stack}, - Category, Example, PipelineData, ShellError, Signature, SyntaxShape, Value, + Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value, }; use polars::prelude::{Expr, JoinType}; @@ -46,16 +46,116 @@ impl Command for LazyJoin { Example { description: "Join two lazy dataframes", example: r#"let df_a = ([[a b c];[1 "a" 0] [2 "b" 1] [1 "c" 2] [1 "c" 3]] | dfr to-lazy); - let df_b = ([["foo" "bar" "ham"];[1 "a" "let"] [1 "c" "var"] [1 "c" "const"]] | dfr to-lazy); + let df_b = ([["foo" "bar" "ham"];[1 "a" "let"] [2 "c" "var"] [3 "c" "const"]] | dfr to-lazy); $df_a | dfr join $df_b a foo | dfr collect"#, - result: None, + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new( + "a".to_string(), + vec![ + Value::test_int(1), + Value::test_int(2), + Value::test_int(1), + Value::test_int(1), + ], + ), + Column::new( + "b".to_string(), + vec![ + Value::test_string("a"), + Value::test_string("b"), + Value::test_string("c"), + Value::test_string("c"), + ], + ), + Column::new( + "c".to_string(), + vec![ + Value::test_int(0), + Value::test_int(1), + Value::test_int(2), + Value::test_int(3), + ], + ), + Column::new( + "bar".to_string(), + vec![ + Value::test_string("a"), + Value::test_string("c"), + Value::test_string("a"), + Value::test_string("a"), + ], + ), + Column::new( + "ham".to_string(), + vec![ + Value::test_string("let"), + Value::test_string("var"), + Value::test_string("let"), + Value::test_string("let"), + ], + ), + ]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), }, Example { description: "Join one eager dataframe with a lazy dataframe", example: r#"let df_a = ([[a b c];[1 "a" 0] [2 "b" 1] [1 "c" 2] [1 "c" 3]] | dfr to-df); - let df_b = ([["foo" "bar" "ham"];[1 "a" "let"] [1 "c" "var"] [1 "c" "const"]] | dfr to-lazy); + let df_b = ([["foo" "bar" "ham"];[1 "a" "let"] [2 "c" "var"] [3 "c" "const"]] | dfr to-lazy); $df_a | dfr join $df_b a foo"#, - result: None, + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new( + "a".to_string(), + vec![ + Value::test_int(1), + Value::test_int(2), + Value::test_int(1), + Value::test_int(1), + ], + ), + Column::new( + "b".to_string(), + vec![ + Value::test_string("a"), + Value::test_string("b"), + Value::test_string("c"), + Value::test_string("c"), + ], + ), + Column::new( + "c".to_string(), + vec![ + Value::test_int(0), + Value::test_int(1), + Value::test_int(2), + Value::test_int(3), + ], + ), + Column::new( + "bar".to_string(), + vec![ + Value::test_string("a"), + Value::test_string("c"), + Value::test_string("a"), + Value::test_string("a"), + ], + ), + Column::new( + "ham".to_string(), + vec![ + Value::test_string("let"), + Value::test_string("var"), + Value::test_string("let"), + Value::test_string("let"), + ], + ), + ]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), }, ] } @@ -82,7 +182,7 @@ impl Command for LazyJoin { }; let other: Value = call.req(engine_state, stack, 0)?; - let (other, _) = NuLazyFrame::maybe_is_eager(other)?; + let other = NuLazyFrame::try_from_value(other)?; let other = other.into_polars(); let left_on: Value = call.req(engine_state, stack, 1)?; @@ -114,10 +214,11 @@ impl Command for LazyJoin { let suffix = suffix.unwrap_or_else(|| "_x".into()); let value = input.into_value(call.head); - let (lazy, from_eager) = NuLazyFrame::maybe_is_eager(value)?; + let lazy = NuLazyFrame::try_from_value(value)?; + let from_eager = lazy.from_eager; let lazy = lazy.into_polars(); - let lazy: NuLazyFrame = lazy + let lazy = lazy .join_builder() .with(other) .left_on(left_on) @@ -125,15 +226,21 @@ impl Command for LazyJoin { .how(how) .force_parallel(true) .suffix(suffix) - .finish() - .into(); + .finish(); - let res = if from_eager { - lazy.collect(call.head)?.into_value(call.head) - } else { - lazy.into_value(call.head) - }; + let lazy = NuLazyFrame::new(from_eager, lazy); - Ok(PipelineData::Value(res, None)) + Ok(PipelineData::Value(lazy.into_value(call.head)?, None)) + } +} + +#[cfg(test)] +mod test { + use super::super::super::test_dataframe::test_dataframe; + use super::*; + + #[test] + fn test_examples() { + test_dataframe(vec![Box::new(LazyJoin {})]) } } diff --git a/crates/nu-command/src/dataframe/lazy/macro_commands.rs b/crates/nu-command/src/dataframe/lazy/macro_commands.rs index 7c9bfe42f5..7e4cd613ac 100644 --- a/crates/nu-command/src/dataframe/lazy/macro_commands.rs +++ b/crates/nu-command/src/dataframe/lazy/macro_commands.rs @@ -1,15 +1,15 @@ /// Definition of multiple lazyframe commands using a macro rule /// All of these commands have an identical body and only require /// to have a change in the name, description and function -use crate::dataframe::values::{NuExpression, NuLazyFrame}; +use crate::dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame}; use nu_protocol::{ ast::Call, engine::{Command, EngineState, Stack}, - Category, Example, PipelineData, ShellError, Signature, + Category, Example, PipelineData, ShellError, Signature, Span, Value, }; macro_rules! lazy_command { - ($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident) => { + ($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident, $test: ident) => { #[derive(Clone)] pub struct $command; @@ -37,10 +37,21 @@ macro_rules! lazy_command { call: &Call, input: PipelineData, ) -> Result { - let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?.into_polars(); - let lazy: NuLazyFrame = lazy.$func().into(); + let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?; + let lazy = NuLazyFrame::new(lazy.from_eager, lazy.into_polars().$func()); - Ok(PipelineData::Value(lazy.into_value(call.head), None)) + Ok(PipelineData::Value(lazy.into_value(call.head)?, None)) + } + } + + #[cfg(test)] + mod $test { + use super::super::super::test_dataframe::test_dataframe; + use super::*; + + #[test] + fn test_examples() { + test_dataframe(vec![Box::new($command {})]) } } }; @@ -53,11 +64,25 @@ lazy_command!( "dfr reverse", "Reverses the LazyFrame", vec![Example { - description: "", - example: "", - result: None, - }], - reverse + description: "Reverses the dataframe", + example: "[[a b]; [6 2] [4 2] [2 2]] | dfr to-df | dfr reverse", + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new( + "a".to_string(), + vec![Value::test_int(2), Value::test_int(4), Value::test_int(6),], + ), + Column::new( + "b".to_string(), + vec![Value::test_int(2), Value::test_int(2), Value::test_int(2),], + ), + ]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + },], + reverse, + test_reverse ); // LazyCache command @@ -67,17 +92,18 @@ lazy_command!( "dfr cache", "Caches operations in a new LazyFrame", vec![Example { - description: "", - example: "", + description: "Caches the result into a new LazyFrame", + example: "[[a b]; [6 2] [4 2] [2 2]] | dfr to-df | dfr reverse | dfr cache", result: None, }], - cache + cache, + test_cache ); // Creates a command that may result in a lazy frame operation or // lazy frame expression macro_rules! lazy_expr_command { - ($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident) => { + ($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident, $test: ident) => { #[derive(Clone)] pub struct $command; @@ -91,7 +117,8 @@ macro_rules! lazy_expr_command { } fn signature(&self) -> Signature { - Signature::build(self.name()).category(Category::Custom("lazyframe".into())) + Signature::build(self.name()) + .category(Category::Custom("lazyframe or expression".into())) } fn examples(&self) -> Vec { @@ -115,21 +142,31 @@ macro_rules! lazy_expr_command { NuExpression::into_value(expr, call.head), None, )) - } else if NuLazyFrame::can_downcast(&value) { - let lazy = NuLazyFrame::try_from_value(value)?.into_polars(); - let lazy: NuLazyFrame = lazy.$func().into(); - - Ok(PipelineData::Value(lazy.into_value(call.head), None)) } else { - Err(ShellError::CantConvert( - "expression or lazyframe".into(), - value.get_type().to_string(), - value.span()?, - None, - )) + let lazy = NuLazyFrame::try_from_value(value)?; + let lazy = NuLazyFrame::new(lazy.from_eager, lazy.into_polars().$func()); + + Ok(PipelineData::Value(lazy.into_value(call.head)?, None)) } } } + + #[cfg(test)] + mod $test { + use super::super::super::test_dataframe::test_dataframe; + use super::*; + use crate::dataframe::lazy::aggregate::LazyAggregate; + use crate::dataframe::lazy::groupby::ToLazyGroupBy; + + #[test] + fn test_examples() { + test_dataframe(vec![ + Box::new($command {}), + Box::new(LazyAggregate {}), + Box::new(ToLazyGroupBy {}), + ]) + } + } }; } @@ -139,12 +176,43 @@ lazy_expr_command!( LazyMax, "dfr max", "Aggregates columns to their max value or creates a max expression", - vec![Example { - description: "", - example: "", - result: None, - }], - max + vec![ + Example { + description: "Max value from columns in a dataframe", + example: "[[a b]; [6 2] [1 4] [4 1]] | dfr to-df | dfr max", + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new("a".to_string(), vec![Value::test_int(6)],), + Column::new("b".to_string(), vec![Value::test_int(4)],), + ]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Max aggregation for a group by", + example: r#"[[a b]; [one 2] [one 4] [two 1]] + | dfr to-df + | dfr group-by a + | dfr agg ("b" | dfr max)"#, + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new( + "a".to_string(), + vec![Value::test_string("one"), Value::test_string("two")], + ), + Column::new( + "b".to_string(), + vec![Value::test_int(4), Value::test_int(1)], + ), + ]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + ], + max, + test_max ); // LazyMin command @@ -153,12 +221,43 @@ lazy_expr_command!( LazyMin, "dfr min", "Aggregates columns to their min value or creates a min expression", - vec![Example { - description: "", - example: "", - result: None, - }], - min + vec![ + Example { + description: "Min value from columns in a dataframe", + example: "[[a b]; [6 2] [1 4] [4 1]] | dfr to-df | dfr min", + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new("a".to_string(), vec![Value::test_int(1)],), + Column::new("b".to_string(), vec![Value::test_int(1)],), + ]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Min aggregation for a group by", + example: r#"[[a b]; [one 2] [one 4] [two 1]] + | dfr to-df + | dfr group-by a + | dfr agg ("b" | dfr min)"#, + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new( + "a".to_string(), + vec![Value::test_string("one"), Value::test_string("two")], + ), + Column::new( + "b".to_string(), + vec![Value::test_int(2), Value::test_int(1)], + ), + ]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + ], + min, + test_min ); // LazySum command @@ -166,13 +265,44 @@ lazy_expr_command!( lazy_expr_command!( LazySum, "dfr sum", - "Aggregates columns to their sum value or creates a sum expression", - vec![Example { - description: "", - example: "", - result: None, - }], - sum + "Aggregates columns to their sum value or creates a sum expression for an aggregation", + vec![ + Example { + description: "Sums all columns in a dataframe", + example: "[[a b]; [6 2] [1 4] [4 1]] | dfr to-df | dfr sum", + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new("a".to_string(), vec![Value::test_int(11)],), + Column::new("b".to_string(), vec![Value::test_int(7)],), + ]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Sum aggregation for a group by", + example: r#"[[a b]; [one 2] [one 4] [two 1]] + | dfr to-df + | dfr group-by a + | dfr agg ("b" | dfr sum)"#, + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new( + "a".to_string(), + vec![Value::test_string("one"), Value::test_string("two")], + ), + Column::new( + "b".to_string(), + vec![Value::test_int(6), Value::test_int(1)], + ), + ]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + ], + sum, + test_sum ); // LazyMean command @@ -180,13 +310,44 @@ lazy_expr_command!( lazy_expr_command!( LazyMean, "dfr mean", - "Aggregates columns to their mean value or creates a mean expression", - vec![Example { - description: "", - example: "", - result: None, - }], - mean + "Aggregates columns to their mean value or creates a mean expression for an aggregation", + vec![ + Example { + description: "Mean value from columns in a dataframe", + example: "[[a b]; [6 2] [4 2] [2 2]] | dfr to-df | dfr mean", + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new("a".to_string(), vec![Value::test_float(4.0)],), + Column::new("b".to_string(), vec![Value::test_float(2.0)],), + ]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Mean aggregation for a group by", + example: r#"[[a b]; [one 2] [one 4] [two 1]] + | dfr to-df + | dfr group-by a + | dfr agg ("b" | dfr mean)"#, + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new( + "a".to_string(), + vec![Value::test_string("one"), Value::test_string("two")], + ), + Column::new( + "b".to_string(), + vec![Value::test_float(3.0), Value::test_float(1.0)], + ), + ]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + ], + mean, + test_mean ); // LazyMedian command @@ -194,13 +355,44 @@ lazy_expr_command!( lazy_expr_command!( LazyMedian, "dfr median", - "Aggregates columns to their median value or creates a median expression", - vec![Example { - description: "", - example: "", - result: None, - }], - median + "Aggregates columns to their median value or creates a median expression for an aggregation", + vec![ + Example { + description: "Median value from columns in a dataframe", + example: "[[a b]; [6 2] [4 2] [2 2]] | dfr to-df | dfr median", + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new("a".to_string(), vec![Value::test_float(4.0)],), + Column::new("b".to_string(), vec![Value::test_float(2.0)],), + ]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Median aggregation for a group by", + example: r#"[[a b]; [one 2] [one 4] [two 1]] + | dfr to-df + | dfr group-by a + | dfr agg ("b" | dfr median)"#, + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new( + "a".to_string(), + vec![Value::test_string("one"), Value::test_string("two")], + ), + Column::new( + "b".to_string(), + vec![Value::test_float(3.0), Value::test_float(1.0)], + ), + ]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + ], + median, + test_median ); // LazyStd command @@ -208,13 +400,44 @@ lazy_expr_command!( lazy_expr_command!( LazyStd, "dfr std", - "Aggregates columns to their std value", - vec![Example { - description: "", - example: "", - result: None, - }], - std + "Aggregates columns to their std value or creates a std expression for an aggregation", + vec![ + Example { + description: "Std value from columns in a dataframe", + example: "[[a b]; [6 2] [4 2] [2 2]] | dfr to-df | dfr std", + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new("a".to_string(), vec![Value::test_float(2.0)],), + Column::new("b".to_string(), vec![Value::test_float(0.0)],), + ]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Std aggregation for a group by", + example: r#"[[a b]; [one 2] [one 2] [two 1] [two 1]] + | dfr to-df + | dfr group-by a + | dfr agg ("b" | dfr std)"#, + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new( + "a".to_string(), + vec![Value::test_string("one"), Value::test_string("two")], + ), + Column::new( + "b".to_string(), + vec![Value::test_float(0.0), Value::test_float(0.0)], + ), + ]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + ], + std, + test_std ); // LazyVar command @@ -222,11 +445,42 @@ lazy_expr_command!( lazy_expr_command!( LazyVar, "dfr var", - "Aggregates columns to their var value", - vec![Example { - description: "", - example: "", - result: None, - }], - var + "Aggregates columns to their var value or create a var expression for an aggregation", + vec![ + Example { + description: "Var value from columns in a dataframe", + example: "[[a b]; [6 2] [4 2] [2 2]] | dfr to-df | dfr var", + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new("a".to_string(), vec![Value::test_float(4.0)],), + Column::new("b".to_string(), vec![Value::test_float(0.0)],), + ]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Var aggregation for a group by", + example: r#"[[a b]; [one 2] [one 2] [two 1] [two 1]] + | dfr to-df + | dfr group-by a + | dfr agg ("b" | dfr var)"#, + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new( + "a".to_string(), + vec![Value::test_string("one"), Value::test_string("two")], + ), + Column::new( + "b".to_string(), + vec![Value::test_float(0.0), Value::test_float(0.0)], + ), + ]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + ], + var, + test_var ); diff --git a/crates/nu-command/src/dataframe/lazy/mod.rs b/crates/nu-command/src/dataframe/lazy/mod.rs index f34bc21eba..3b9548f070 100644 --- a/crates/nu-command/src/dataframe/lazy/mod.rs +++ b/crates/nu-command/src/dataframe/lazy/mod.rs @@ -13,10 +13,10 @@ mod to_lazy; use nu_protocol::engine::StateWorkingSet; -use crate::dataframe::lazy::macro_commands::*; +pub(crate) use crate::dataframe::lazy::macro_commands::*; use crate::dataframe::lazy::aggregate::LazyAggregate; -use crate::dataframe::lazy::collect::LazyCollect; +pub use crate::dataframe::lazy::collect::LazyCollect; use crate::dataframe::lazy::fetch::LazyFetch; use crate::dataframe::lazy::fill_na::LazyFillNA; use crate::dataframe::lazy::fill_null::LazyFillNull; @@ -25,7 +25,7 @@ use crate::dataframe::lazy::join::LazyJoin; use crate::dataframe::lazy::quantile::LazyQuantile; use crate::dataframe::lazy::select::LazySelect; use crate::dataframe::lazy::sort_by_expr::LazySortBy; -use crate::dataframe::lazy::to_lazy::ToLazyFrame; +pub use crate::dataframe::lazy::to_lazy::ToLazyFrame; pub fn add_lazy_decls(working_set: &mut StateWorkingSet) { macro_rules! bind_command { diff --git a/crates/nu-command/src/dataframe/lazy/quantile.rs b/crates/nu-command/src/dataframe/lazy/quantile.rs index 067ba4602c..6726ab56ce 100644 --- a/crates/nu-command/src/dataframe/lazy/quantile.rs +++ b/crates/nu-command/src/dataframe/lazy/quantile.rs @@ -1,9 +1,9 @@ -use crate::dataframe::values::NuLazyFrame; +use crate::dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame}; use nu_engine::CallExt; use nu_protocol::{ ast::Call, engine::{Command, EngineState, Stack}, - Category, Example, PipelineData, ShellError, Signature, SyntaxShape, + Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value, }; use polars::prelude::QuantileInterpolOptions; @@ -30,11 +30,41 @@ impl Command for LazyQuantile { } fn examples(&self) -> Vec { - vec![Example { - description: "", - example: "", - result: None, - }] + vec![ + Example { + description: "quantile value from columns in a dataframe", + example: "[[a b]; [6 2] [1 4] [4 1]] | dfr to-df | dfr quantile 0.5", + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new("a".to_string(), vec![Value::test_float(4.0)]), + Column::new("b".to_string(), vec![Value::test_float(2.0)]), + ]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Quantile aggregation for a group by", + example: r#"[[a b]; [one 2] [one 4] [two 1]] + | dfr to-df + | dfr group-by a + | dfr agg ("b" | dfr quantile 0.5)"#, + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new( + "a".to_string(), + vec![Value::test_string("one"), Value::test_string("two")], + ), + Column::new( + "b".to_string(), + vec![Value::test_float(4.0), Value::test_float(1.0)], + ), + ]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + ] } fn run( @@ -44,24 +74,46 @@ impl Command for LazyQuantile { call: &Call, input: PipelineData, ) -> Result { + let value = input.into_value(call.head); let quantile: f64 = call.req(engine_state, stack, 0)?; - let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?.into_polars(); - let lazy: NuLazyFrame = lazy - .quantile(quantile, QuantileInterpolOptions::default()) - .into(); + if NuExpression::can_downcast(&value) { + let expr = NuExpression::try_from_value(value)?; + let expr: NuExpression = expr + .into_polars() + .quantile(quantile, QuantileInterpolOptions::default()) + .into(); - Ok(PipelineData::Value(lazy.into_value(call.head), None)) + Ok(PipelineData::Value( + NuExpression::into_value(expr, call.head), + None, + )) + } else { + let lazy = NuLazyFrame::try_from_value(value)?; + let lazy = NuLazyFrame::new( + lazy.from_eager, + lazy.into_polars() + .quantile(quantile, QuantileInterpolOptions::default()), + ); + + Ok(PipelineData::Value(lazy.into_value(call.head)?, None)) + } } } -//#[cfg(test)] -//mod test { -// use super::super::super::test_dataframe::test_dataframe; -// use super::*; -// -// #[test] -// fn test_examples() { -// test_dataframe(vec![Box::new(LazyQuantile {})]) -// } -//} +#[cfg(test)] +mod test { + use super::super::super::test_dataframe::test_dataframe; + use super::*; + use crate::dataframe::lazy::aggregate::LazyAggregate; + use crate::dataframe::lazy::groupby::ToLazyGroupBy; + + #[test] + fn test_examples() { + test_dataframe(vec![ + Box::new(LazyQuantile {}), + Box::new(LazyAggregate {}), + Box::new(ToLazyGroupBy {}), + ]) + } +} diff --git a/crates/nu-command/src/dataframe/lazy/select.rs b/crates/nu-command/src/dataframe/lazy/select.rs index 31dd3e5607..f336e46658 100644 --- a/crates/nu-command/src/dataframe/lazy/select.rs +++ b/crates/nu-command/src/dataframe/lazy/select.rs @@ -1,10 +1,10 @@ -use crate::dataframe::values::{NuExpression, NuLazyFrame}; +use crate::dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame}; use nu_engine::CallExt; use nu_protocol::{ ast::Call, engine::{Command, EngineState, Stack}, - Category, Example, PipelineData, ShellError, Signature, SyntaxShape, Value, + Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value, }; use polars::prelude::Expr; @@ -22,7 +22,7 @@ impl Command for LazySelect { fn signature(&self) -> Signature { Signature::build(self.name()) - .required( + .rest( "select expressions", SyntaxShape::Any, "Expression(s) that define the column selection", @@ -32,9 +32,16 @@ impl Command for LazySelect { fn examples(&self) -> Vec { vec![Example { - description: "", - example: "", - result: None, + description: "Select a column from the dataframe", + example: "[[a b]; [6 2] [4 2] [2 2]] | dfr to-df | dfr select a", + result: Some( + NuDataFrame::try_from_columns(vec![Column::new( + "a".to_string(), + vec![Value::test_int(6), Value::test_int(4), Value::test_int(2)], + )]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), }] } @@ -45,7 +52,11 @@ impl Command for LazySelect { call: &Call, input: PipelineData, ) -> Result { - let value: Value = call.req(engine_state, stack, 0)?; + let vals: Vec = call.rest(engine_state, stack, 0)?; + let value = Value::List { + vals, + span: call.head, + }; let expressions = NuExpression::extract_exprs(value)?; if expressions @@ -59,20 +70,20 @@ impl Command for LazySelect { )); } - let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?.into_polars(); - let lazy: NuLazyFrame = lazy.select(&expressions).into(); + let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?; + let lazy = NuLazyFrame::new(lazy.from_eager, lazy.into_polars().select(&expressions)); - Ok(PipelineData::Value(lazy.into_value(call.head), None)) + Ok(PipelineData::Value(lazy.into_value(call.head)?, None)) } } -//#[cfg(test)] -//mod test { -// use super::super::super::test_dataframe::test_dataframe; -// use super::*; -// -// #[test] -// fn test_examples() { -// test_dataframe(vec![Box::new(LazySelect {})]) -// } -//} +#[cfg(test)] +mod test { + use super::super::super::test_dataframe::test_dataframe; + use super::*; + + #[test] + fn test_examples() { + test_dataframe(vec![Box::new(LazySelect {})]) + } +} diff --git a/crates/nu-command/src/dataframe/lazy/sort_by_expr.rs b/crates/nu-command/src/dataframe/lazy/sort_by_expr.rs index 257d8d0aac..f11d5db371 100644 --- a/crates/nu-command/src/dataframe/lazy/sort_by_expr.rs +++ b/crates/nu-command/src/dataframe/lazy/sort_by_expr.rs @@ -1,10 +1,10 @@ use super::super::values::NuLazyFrame; -use crate::dataframe::values::NuExpression; +use crate::dataframe::values::{Column, NuDataFrame, NuExpression}; use nu_engine::CallExt; use nu_protocol::{ ast::Call, engine::{Command, EngineState, Stack}, - Category, Example, PipelineData, ShellError, Signature, SyntaxShape, Value, + Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value, }; #[derive(Clone)] @@ -21,26 +21,70 @@ impl Command for LazySortBy { fn signature(&self) -> Signature { Signature::build(self.name()) - .required( - "filter expression", + .rest( + "sort expression", SyntaxShape::Any, - "filtering expression", + "sort expression for the dataframe", ) .named( "reverse", SyntaxShape::List(Box::new(SyntaxShape::Boolean)), - "list indicating if reverse search should be done in the column. Default is false", + "Reverse sorting. Default is false", Some('r'), ) .category(Category::Custom("lazyframe".into())) } fn examples(&self) -> Vec { - vec![Example { - description: "", - example: "", - result: None, - }] + vec![ + Example { + description: "Sort dataframe by one column", + example: "[[a b]; [6 2] [1 4] [4 1]] | dfr to-df | dfr sort-by a", + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new( + "a".to_string(), + vec![Value::test_int(1), Value::test_int(4), Value::test_int(6)], + ), + Column::new( + "b".to_string(), + vec![Value::test_int(4), Value::test_int(1), Value::test_int(2)], + ), + ]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Sort column using two columns", + example: + "[[a b]; [6 2] [1 1] [1 4] [2 4]] | dfr to-df | dfr sort-by [a b] -r [false true]", + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new( + "a".to_string(), + vec![ + Value::test_int(1), + Value::test_int(1), + Value::test_int(2), + Value::test_int(6), + ], + ), + Column::new( + "b".to_string(), + vec![ + Value::test_int(4), + Value::test_int(1), + Value::test_int(4), + Value::test_int(2), + ], + ), + ]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + ] } fn run( @@ -50,7 +94,11 @@ impl Command for LazySortBy { call: &Call, input: PipelineData, ) -> Result { - let value: Value = call.req(engine_state, stack, 0)?; + let vals: Vec = call.rest(engine_state, stack, 0)?; + let value = Value::List { + vals, + span: call.head, + }; let expressions = NuExpression::extract_exprs(value)?; let reverse: Option> = call.get_flag(engine_state, stack, "reverse")?; @@ -76,25 +124,25 @@ impl Command for LazySortBy { }; let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?; - let lazy: NuLazyFrame = lazy - .into_polars() - .sort_by_exprs(&expressions, reverse) - .into(); + let lazy = NuLazyFrame::new( + lazy.from_eager, + lazy.into_polars().sort_by_exprs(&expressions, reverse), + ); Ok(PipelineData::Value( - NuLazyFrame::into_value(lazy, call.head), + NuLazyFrame::into_value(lazy, call.head)?, None, )) } } -//#[cfg(test)] -//mod test { -// use super::super::super::test_dataframe::test_dataframe; -// use super::*; -// -// #[test] -// fn test_examples() { -// test_dataframe(vec![Box::new(LazySortBy {})]) -// } -//} +#[cfg(test)] +mod test { + use super::super::super::test_dataframe::test_dataframe; + use super::*; + + #[test] + fn test_examples() { + test_dataframe(vec![Box::new(LazySortBy {})]) + } +} diff --git a/crates/nu-command/src/dataframe/lazy/to_lazy.rs b/crates/nu-command/src/dataframe/lazy/to_lazy.rs index 434f761230..4e0158036a 100644 --- a/crates/nu-command/src/dataframe/lazy/to_lazy.rs +++ b/crates/nu-command/src/dataframe/lazy/to_lazy.rs @@ -3,7 +3,7 @@ use super::super::values::{NuDataFrame, NuLazyFrame}; use nu_protocol::{ ast::Call, engine::{Command, EngineState, Stack}, - Category, Example, PipelineData, ShellError, Signature, + Category, Example, PipelineData, ShellError, Signature, Value, }; #[derive(Clone)] @@ -39,7 +39,11 @@ impl Command for ToLazyFrame { ) -> Result { let df = NuDataFrame::try_from_iter(input.into_iter())?; let lazy = NuLazyFrame::from_dataframe(df); + let value = Value::CustomValue { + val: Box::new(lazy), + span: call.head, + }; - Ok(PipelineData::Value(lazy.into_value(call.head), None)) + Ok(PipelineData::Value(value, None)) } } diff --git a/crates/nu-command/src/dataframe/series/masks/is_not_null.rs b/crates/nu-command/src/dataframe/series/masks/is_not_null.rs index 446f31462f..a763df1a5d 100644 --- a/crates/nu-command/src/dataframe/series/masks/is_not_null.rs +++ b/crates/nu-command/src/dataframe/series/masks/is_not_null.rs @@ -20,7 +20,7 @@ impl Command for IsNotNull { } fn signature(&self) -> Signature { - Signature::build(self.name()).category(Category::Custom("dataframe".into())) + Signature::build(self.name()).category(Category::Custom("dataframe or lazyframe".into())) } fn examples(&self) -> Vec { diff --git a/crates/nu-command/src/dataframe/series/masks/is_null.rs b/crates/nu-command/src/dataframe/series/masks/is_null.rs index 008b90a363..6f9afbfad3 100644 --- a/crates/nu-command/src/dataframe/series/masks/is_null.rs +++ b/crates/nu-command/src/dataframe/series/masks/is_null.rs @@ -20,7 +20,7 @@ impl Command for IsNull { } fn signature(&self) -> Signature { - Signature::build(self.name()).category(Category::Custom("dataframe".into())) + Signature::build(self.name()).category(Category::Custom("dataframe or expression".into())) } fn examples(&self) -> Vec { diff --git a/crates/nu-command/src/dataframe/series/masks/not.rs b/crates/nu-command/src/dataframe/series/masks/not.rs index 72877383a1..d45b560adc 100644 --- a/crates/nu-command/src/dataframe/series/masks/not.rs +++ b/crates/nu-command/src/dataframe/series/masks/not.rs @@ -22,7 +22,7 @@ impl Command for NotSeries { } fn signature(&self) -> Signature { - Signature::build(self.name()).category(Category::Custom("dataframe".into())) + Signature::build(self.name()).category(Category::Custom("dataframe or lazyframes".into())) } fn examples(&self) -> Vec { @@ -45,7 +45,7 @@ impl Command for NotSeries { }, Example { description: "Creates a not expression from a column", - example: "dfr col a | dfr not", + example: "((dfr col a) > 2) | dfr not", result: None, }, ] diff --git a/crates/nu-command/src/dataframe/series/mod.rs b/crates/nu-command/src/dataframe/series/mod.rs index d55aeaccd2..e1b9bc1087 100644 --- a/crates/nu-command/src/dataframe/series/mod.rs +++ b/crates/nu-command/src/dataframe/series/mod.rs @@ -17,7 +17,6 @@ mod arg_min; mod cumulative; mod n_null; mod n_unique; -mod rename; mod rolling; mod shift; mod unique; @@ -32,7 +31,6 @@ pub use arg_min::ArgMin; pub use cumulative::Cumulative; pub use n_null::NNull; pub use n_unique::NUnique; -pub use rename::Rename; pub use rolling::Rolling; pub use shift::Shift; pub use unique::Unique; @@ -80,7 +78,6 @@ pub fn add_series_decls(working_set: &mut StateWorkingSet) { NNull, NUnique, NotSeries, - Rename, Replace, ReplaceAll, Rolling, diff --git a/crates/nu-command/src/dataframe/series/n_unique.rs b/crates/nu-command/src/dataframe/series/n_unique.rs index 1105b62864..bc642c91e8 100644 --- a/crates/nu-command/src/dataframe/series/n_unique.rs +++ b/crates/nu-command/src/dataframe/series/n_unique.rs @@ -11,7 +11,7 @@ pub struct NUnique; impl Command for NUnique { fn name(&self) -> &str { - "dfr count-unique" + "dfr n-unique" } fn usage(&self) -> &str { @@ -19,14 +19,14 @@ impl Command for NUnique { } fn signature(&self) -> Signature { - Signature::build(self.name()).category(Category::Custom("dataframe".into())) + Signature::build(self.name()).category(Category::Custom("dataframe or expression".into())) } fn examples(&self) -> Vec { vec![ Example { description: "Counts unique values", - example: "[1 1 2 2 3 3 4] | dfr to-df | dfr count-unique", + example: "[1 1 2 2 3 3 4] | dfr to-df | dfr n-unique", result: Some( NuDataFrame::try_from_columns(vec![Column::new( "count_unique".to_string(), diff --git a/crates/nu-command/src/dataframe/series/rename.rs b/crates/nu-command/src/dataframe/series/rename.rs deleted file mode 100644 index 9de4d86a3c..0000000000 --- a/crates/nu-command/src/dataframe/series/rename.rs +++ /dev/null @@ -1,84 +0,0 @@ -use super::super::values::{Column, NuDataFrame}; - -use nu_engine::CallExt; -use nu_protocol::{ - ast::Call, - engine::{Command, EngineState, Stack}, - Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Value, -}; - -#[derive(Clone)] -pub struct Rename; - -impl Command for Rename { - fn name(&self) -> &str { - "dfr rename" - } - - fn usage(&self) -> &str { - "Renames a series" - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .required("name", SyntaxShape::String, "new series name") - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Renames a series", - example: "[5 6 7 8] | dfr to-df | dfr rename new_name", - result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "new_name".to_string(), - vec![ - Value::test_int(5), - Value::test_int(6), - Value::test_int(7), - Value::test_int(8), - ], - )]) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let name: String = call.req(engine_state, stack, 0)?; - - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - let mut series = df.as_series(call.head)?; - series.rename(&name); - - NuDataFrame::try_from_series(vec![series], call.head) - .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) -} - -#[cfg(test)] -mod test { - use super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(Rename {})]) - } -} diff --git a/crates/nu-command/src/dataframe/series/shift.rs b/crates/nu-command/src/dataframe/series/shift.rs index 78bf52c920..6c231a520a 100644 --- a/crates/nu-command/src/dataframe/series/shift.rs +++ b/crates/nu-command/src/dataframe/series/shift.rs @@ -27,10 +27,10 @@ impl Command for Shift { .named( "fill", SyntaxShape::Any, - "Expression to use to fill the null values (lazy df)", + "Expression used to fill the null values (lazy df)", Some('f'), ) - .category(Category::Custom("dataframe".into())) + .category(Category::Custom("dataframe or lazyframe".into())) } fn examples(&self) -> Vec { @@ -106,7 +106,7 @@ fn command_lazy( None => lazy.shift(shift).into(), }; - Ok(PipelineData::Value(lazy.into_value(call.head), None)) + Ok(PipelineData::Value(lazy.into_value(call.head)?, None)) } #[cfg(test)] diff --git a/crates/nu-command/src/dataframe/series/unique.rs b/crates/nu-command/src/dataframe/series/unique.rs index 1b849e3989..4a2715ef9d 100644 --- a/crates/nu-command/src/dataframe/series/unique.rs +++ b/crates/nu-command/src/dataframe/series/unique.rs @@ -40,22 +40,29 @@ impl Command for Unique { "Keep the same order as the original DataFrame (lazy df)", Some('k'), ) - .category(Category::Custom("dataframe".into())) + .category(Category::Custom("dataframe or expression".into())) } fn examples(&self) -> Vec { - vec![Example { - description: "Returns unique values from a series", - example: "[2 2 2 2 2] | dfr to-df | dfr unique", - result: Some( - NuDataFrame::try_from_columns(vec![Column::new( - "0".to_string(), - vec![Value::test_int(2)], - )]) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] + vec![ + Example { + description: "Returns unique values from a series", + example: "[2 2 2 2 2] | dfr to-df | dfr unique", + result: Some( + NuDataFrame::try_from_columns(vec![Column::new( + "0".to_string(), + vec![Value::test_int(2)], + )]) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Creates a is unique expression from a column", + example: "dfr col a | dfr unique", + result: None, + }, + ] } fn run( @@ -134,7 +141,7 @@ fn command_lazy( lazy.unique_stable(subset, strategy).into() }; - Ok(PipelineData::Value(lazy.into_value(call.head), None)) + Ok(PipelineData::Value(lazy.into_value(call.head)?, None)) } #[cfg(test)] diff --git a/crates/nu-command/src/dataframe/test_dataframe.rs b/crates/nu-command/src/dataframe/test_dataframe.rs index d855367bb3..3c22cc3dce 100644 --- a/crates/nu-command/src/dataframe/test_dataframe.rs +++ b/crates/nu-command/src/dataframe/test_dataframe.rs @@ -6,6 +6,7 @@ use nu_protocol::{ }; use super::eager::ToDataFrame; +use super::lazy::{LazyCollect, ToLazyFrame}; use crate::Let; pub fn test_dataframe(cmds: Vec>) { @@ -23,6 +24,8 @@ pub fn test_dataframe(cmds: Vec>) { let mut working_set = StateWorkingSet::new(&*engine_state); working_set.add_decl(Box::new(Let)); working_set.add_decl(Box::new(ToDataFrame)); + working_set.add_decl(Box::new(ToLazyFrame)); + working_set.add_decl(Box::new(LazyCollect)); // Adding the command that is being tested to the working set for cmd in cmds { diff --git a/crates/nu-command/src/dataframe/values/mod.rs b/crates/nu-command/src/dataframe/values/mod.rs index e4f11222fd..1a5c54a296 100644 --- a/crates/nu-command/src/dataframe/values/mod.rs +++ b/crates/nu-command/src/dataframe/values/mod.rs @@ -2,9 +2,11 @@ mod nu_dataframe; mod nu_expression; mod nu_lazyframe; mod nu_lazygroupby; +mod nu_when; pub mod utils; pub use nu_dataframe::{Axis, Column, NuDataFrame}; pub use nu_expression::NuExpression; pub use nu_lazyframe::NuLazyFrame; pub use nu_lazygroupby::NuLazyGroupBy; +pub use nu_when::NuWhen; diff --git a/crates/nu-command/src/dataframe/values/nu_dataframe/conversion.rs b/crates/nu-command/src/dataframe/values/nu_dataframe/conversion.rs index 7ae745c939..bc659dd884 100644 --- a/crates/nu-command/src/dataframe/values/nu_dataframe/conversion.rs +++ b/crates/nu-command/src/dataframe/values/nu_dataframe/conversion.rs @@ -715,7 +715,7 @@ pub fn from_parsed_columns(column_values: ColumnMap) -> Result Value { - let cloned = NuDataFrame(self.0.clone()); + let cloned = NuDataFrame { + df: self.df.clone(), + from_lazy: false, + }; Value::CustomValue { val: Box::new(cloned), diff --git a/crates/nu-command/src/dataframe/values/nu_dataframe/mod.rs b/crates/nu-command/src/dataframe/values/nu_dataframe/mod.rs index 72fffc2a00..60772fe8da 100644 --- a/crates/nu-command/src/dataframe/values/nu_dataframe/mod.rs +++ b/crates/nu-command/src/dataframe/values/nu_dataframe/mod.rs @@ -8,11 +8,11 @@ pub use operations::Axis; use indexmap::map::IndexMap; use nu_protocol::{did_you_mean, PipelineData, ShellError, Span, Value}; -use polars::prelude::{DataFrame, DataType, PolarsObject, Series}; +use polars::prelude::{DataFrame, DataType, IntoLazy, LazyFrame, PolarsObject, Series}; use serde::{Deserialize, Serialize}; use std::{cmp::Ordering, fmt::Display, hash::Hasher}; -use super::utils::DEFAULT_ROWS; +use super::{utils::DEFAULT_ROWS, NuLazyFrame}; // DataFrameValue is an encapsulation of Nushell Value that can be used // to define the PolarsObject Trait. The polars object trait allows to @@ -70,29 +70,39 @@ impl PolarsObject for DataFrameValue { } #[derive(Debug, Serialize, Deserialize)] -pub struct NuDataFrame(DataFrame); +pub struct NuDataFrame { + pub df: DataFrame, + pub from_lazy: bool, +} impl AsRef for NuDataFrame { fn as_ref(&self) -> &polars::prelude::DataFrame { - &self.0 + &self.df } } impl AsMut for NuDataFrame { fn as_mut(&mut self) -> &mut polars::prelude::DataFrame { - &mut self.0 + &mut self.df } } impl From for NuDataFrame { - fn from(dataframe: DataFrame) -> Self { - Self(dataframe) + fn from(df: DataFrame) -> Self { + Self { + df, + from_lazy: false, + } } } impl NuDataFrame { - pub fn new(dataframe: DataFrame) -> Self { - Self(dataframe) + pub fn new(from_lazy: bool, df: DataFrame) -> Self { + Self { df, from_lazy } + } + + pub fn lazy(&self) -> LazyFrame { + self.df.clone().lazy() } fn default_value(span: Span) -> Value { @@ -102,15 +112,23 @@ impl NuDataFrame { pub fn dataframe_into_value(dataframe: DataFrame, span: Span) -> Value { Value::CustomValue { - val: Box::new(Self::new(dataframe)), + val: Box::new(Self::new(false, dataframe)), span, } } pub fn into_value(self, span: Span) -> Value { - Value::CustomValue { - val: Box::new(self), - span, + if self.from_lazy { + let lazy = NuLazyFrame::from_dataframe(self); + Value::CustomValue { + val: Box::new(lazy), + span, + } + } else { + Value::CustomValue { + val: Box::new(self), + span, + } } } @@ -170,7 +188,7 @@ impl NuDataFrame { ) })?; - Ok(Self::new(dataframe)) + Ok(Self::new(false, dataframe)) } pub fn try_from_columns(columns: Vec) -> Result { @@ -187,9 +205,30 @@ impl NuDataFrame { } pub fn try_from_value(value: Value) -> Result { + if Self::can_downcast(&value) { + Ok(Self::get_df(value)?) + } else if NuLazyFrame::can_downcast(&value) { + let span = value.span()?; + let lazy = NuLazyFrame::try_from_value(value)?; + let df = lazy.collect(span)?; + Ok(df) + } else { + Err(ShellError::CantConvert( + "lazy or eager dataframe".into(), + value.get_type().to_string(), + value.span()?, + None, + )) + } + } + + pub fn get_df(value: Value) -> Result { match value { Value::CustomValue { val, span } => match val.as_any().downcast_ref::() { - Some(df) => Ok(NuDataFrame(df.0.clone())), + Some(df) => Ok(NuDataFrame { + df: df.df.clone(), + from_lazy: false, + }), None => Err(ShellError::CantConvert( "dataframe".into(), "non-dataframe".into(), @@ -220,9 +259,9 @@ impl NuDataFrame { } pub fn column(&self, column: &str, span: Span) -> Result { - let s = self.0.column(column).map_err(|_| { + let s = self.df.column(column).map_err(|_| { let possibilities = self - .0 + .df .get_column_names() .iter() .map(|name| name.to_string()) @@ -232,7 +271,7 @@ impl NuDataFrame { ShellError::DidYouMean(option, span) })?; - let dataframe = DataFrame::new(vec![s.clone()]).map_err(|e| { + let df = DataFrame::new(vec![s.clone()]).map_err(|e| { ShellError::GenericError( "Error creating dataframe".into(), e.to_string(), @@ -242,11 +281,14 @@ impl NuDataFrame { ) })?; - Ok(Self(dataframe)) + Ok(Self { + df, + from_lazy: false, + }) } pub fn is_series(&self) -> bool { - self.0.width() == 1 + self.df.width() == 1 } pub fn as_series(&self, span: Span) -> Result { @@ -261,7 +303,7 @@ impl NuDataFrame { } let series = self - .0 + .df .get_columns() .get(0) .expect("We have already checked that the width is 1"); @@ -286,7 +328,7 @@ impl NuDataFrame { // Print is made out a head and if the dataframe is too large, then a tail pub fn print(&self, span: Span) -> Result, ShellError> { - let df = &self.0; + let df = &self.df; let size: usize = 20; if df.height() > size { @@ -305,7 +347,7 @@ impl NuDataFrame { } pub fn height(&self) -> usize { - self.0.height() + self.df.height() } pub fn head(&self, rows: Option, span: Span) -> Result, ShellError> { @@ -316,7 +358,7 @@ impl NuDataFrame { } pub fn tail(&self, rows: Option, span: Span) -> Result, ShellError> { - let df = &self.0; + let df = &self.df; let to_row = df.height(); let size = rows.unwrap_or(DEFAULT_ROWS); let from_row = to_row.saturating_sub(size); @@ -332,12 +374,12 @@ impl NuDataFrame { to_row: usize, span: Span, ) -> Result, ShellError> { - let df = &self.0; + let df = &self.df; let upper_row = to_row.min(df.height()); let mut size: usize = 0; let columns = self - .0 + .df .get_columns() .iter() .map( diff --git a/crates/nu-command/src/dataframe/values/nu_dataframe/operations.rs b/crates/nu-command/src/dataframe/values/nu_dataframe/operations.rs index 30895a16e8..a35ea6479b 100644 --- a/crates/nu-command/src/dataframe/values/nu_dataframe/operations.rs +++ b/crates/nu-command/src/dataframe/values/nu_dataframe/operations.rs @@ -73,11 +73,11 @@ impl NuDataFrame { ) } _ => { - if self.0.height() != rhs.0.height() { + if self.df.height() != rhs.df.height() { return Err(ShellError::IncompatibleParameters { - left_message: format!("rows {}", self.0.height()), + left_message: format!("rows {}", self.df.height()), left_span: lhs_span, - right_message: format!("rows {}", rhs.0.height()), + right_message: format!("rows {}", rhs.df.height()), right_span: *rhs_span, }); } @@ -119,10 +119,10 @@ impl NuDataFrame { let mut columns: Vec<&str> = Vec::new(); let new_cols = self - .0 + .df .get_columns() .iter() - .chain(other.0.get_columns()) + .chain(other.df.get_columns()) .map(|s| { let name = if columns.contains(&s.name()) { format!("{}_{}", s.name(), "x") @@ -147,10 +147,10 @@ impl NuDataFrame { ) })?; - Ok(NuDataFrame::new(df_new)) + Ok(NuDataFrame::new(false, df_new)) } Axis::Column => { - if self.0.width() != other.0.width() { + if self.df.width() != other.df.width() { return Err(ShellError::IncompatibleParametersSingle( "Dataframes with different number of columns".into(), span, @@ -158,10 +158,10 @@ impl NuDataFrame { } if !self - .0 + .df .get_column_names() .iter() - .all(|col| other.0.get_column_names().contains(col)) + .all(|col| other.df.get_column_names().contains(col)) { return Err(ShellError::IncompatibleParametersSingle( "Dataframes with different columns names".into(), @@ -170,12 +170,12 @@ impl NuDataFrame { } let new_cols = self - .0 + .df .get_columns() .iter() .map(|s| { let other_col = other - .0 + .df .column(s.name()) .expect("Already checked that dataframes have same columns"); @@ -207,7 +207,7 @@ impl NuDataFrame { ) })?; - Ok(NuDataFrame::new(df_new)) + Ok(NuDataFrame::new(false, df_new)) } } } diff --git a/crates/nu-command/src/dataframe/values/nu_expression/mod.rs b/crates/nu-command/src/dataframe/values/nu_expression/mod.rs index eed8e814c6..8efa4eee32 100644 --- a/crates/nu-command/src/dataframe/values/nu_expression/mod.rs +++ b/crates/nu-command/src/dataframe/values/nu_expression/mod.rs @@ -100,6 +100,7 @@ impl NuExpression { pub fn can_downcast(value: &Value) -> bool { match value { Value::CustomValue { val, .. } => val.as_any().downcast_ref::().is_some(), + Value::List { vals, .. } => vals.iter().all(Self::can_downcast), Value::String { .. } | Value::Int { .. } | Value::Bool { .. } | Value::Float { .. } => { true } diff --git a/crates/nu-command/src/dataframe/values/nu_lazyframe/custom_value.rs b/crates/nu-command/src/dataframe/values/nu_lazyframe/custom_value.rs index e968189fc9..39d723e22e 100644 --- a/crates/nu-command/src/dataframe/values/nu_lazyframe/custom_value.rs +++ b/crates/nu-command/src/dataframe/values/nu_lazyframe/custom_value.rs @@ -12,7 +12,10 @@ impl CustomValue for NuLazyFrame { } fn clone_value(&self, span: nu_protocol::Span) -> Value { - let cloned = NuLazyFrame(self.0.clone()); + let cloned = NuLazyFrame { + lazy: self.lazy.clone(), + from_eager: self.from_eager, + }; Value::CustomValue { val: Box::new(cloned), diff --git a/crates/nu-command/src/dataframe/values/nu_lazyframe/mod.rs b/crates/nu-command/src/dataframe/values/nu_lazyframe/mod.rs index b0d7a2f9fe..8d6b5d985b 100644 --- a/crates/nu-command/src/dataframe/values/nu_lazyframe/mod.rs +++ b/crates/nu-command/src/dataframe/values/nu_lazyframe/mod.rs @@ -10,7 +10,10 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer}; // Polars LazyFrame is behind and Option to allow easy implementation of // the Deserialize trait #[derive(Default)] -pub struct NuLazyFrame(Option); +pub struct NuLazyFrame { + pub lazy: Option, + pub from_eager: bool, +} // Mocked serialization of the LazyFrame object impl Serialize for NuLazyFrame { @@ -43,7 +46,7 @@ impl AsRef for NuLazyFrame { fn as_ref(&self) -> &polars::prelude::LazyFrame { // The only case when there cannot be a lazy frame is if it is created // using the default function or if created by deserializing something - self.0.as_ref().expect("there should always be a frame") + self.lazy.as_ref().expect("there should always be a frame") } } @@ -51,35 +54,56 @@ impl AsMut for NuLazyFrame { fn as_mut(&mut self) -> &mut polars::prelude::LazyFrame { // The only case when there cannot be a lazy frame is if it is created // using the default function or if created by deserializing something - self.0.as_mut().expect("there should always be a frame") + self.lazy.as_mut().expect("there should always be a frame") } } impl From for NuLazyFrame { fn from(lazy_frame: LazyFrame) -> Self { - Self(Some(lazy_frame)) + Self { + lazy: Some(lazy_frame), + from_eager: false, + } } } impl NuLazyFrame { - pub fn from_dataframe(df: NuDataFrame) -> Self { - let lazy = df.as_ref().clone().lazy(); - Self(Some(lazy)) + pub fn new(from_eager: bool, lazy: LazyFrame) -> Self { + Self { + lazy: Some(lazy), + from_eager, + } } - pub fn into_value(self, span: Span) -> Value { - Value::CustomValue { - val: Box::new(self), - span, + pub fn from_dataframe(df: NuDataFrame) -> Self { + let lazy = df.as_ref().clone().lazy(); + Self { + lazy: Some(lazy), + from_eager: true, + } + } + + pub fn into_value(self, span: Span) -> Result { + if self.from_eager { + let df = self.collect(span)?; + Ok(Value::CustomValue { + val: Box::new(df), + span, + }) + } else { + Ok(Value::CustomValue { + val: Box::new(self), + span, + }) } } pub fn into_polars(self) -> LazyFrame { - self.0.expect("lazyframe cannot be none to convert") + self.lazy.expect("lazyframe cannot be none to convert") } pub fn collect(self, span: Span) -> Result { - self.0 + self.lazy .expect("No empty lazy for collect") .collect() .map_err(|e| { @@ -91,13 +115,40 @@ impl NuLazyFrame { Vec::new(), ) }) - .map(NuDataFrame::new) + .map(|df| NuDataFrame { + df, + from_lazy: !self.from_eager, + }) } pub fn try_from_value(value: Value) -> Result { + if Self::can_downcast(&value) { + Ok(Self::get_lazy_df(value)?) + } else if NuDataFrame::can_downcast(&value) { + let df = NuDataFrame::try_from_value(value)?; + Ok(NuLazyFrame::from_dataframe(df)) + } else { + Err(ShellError::CantConvert( + "lazy or eager dataframe".into(), + value.get_type().to_string(), + value.span()?, + None, + )) + } + } + + pub fn try_from_pipeline(input: PipelineData, span: Span) -> Result { + let value = input.into_value(span); + Self::try_from_value(value) + } + + pub fn get_lazy_df(value: Value) -> Result { match value { Value::CustomValue { val, span } => match val.as_any().downcast_ref::() { - Some(expr) => Ok(Self(expr.0.clone())), + Some(expr) => Ok(Self { + lazy: expr.lazy.clone(), + from_eager: false, + }), None => Err(ShellError::CantConvert( "lazy frame".into(), "non-dataframe".into(), @@ -114,11 +165,6 @@ impl NuLazyFrame { } } - pub fn try_from_pipeline(input: PipelineData, span: Span) -> Result { - let value = input.into_value(span); - Self::try_from_value(value) - } - pub fn can_downcast(value: &Value) -> bool { if let Value::CustomValue { val, .. } = value { val.as_any().downcast_ref::().is_some() @@ -127,30 +173,17 @@ impl NuLazyFrame { } } - pub fn maybe_is_eager(value: Value) -> Result<(Self, bool), ShellError> { - if Self::can_downcast(&value) { - Ok((Self::try_from_value(value)?, false)) - } else if NuDataFrame::can_downcast(&value) { - let df = NuDataFrame::try_from_value(value)?; - Ok((NuLazyFrame::from_dataframe(df), true)) - } else { - Err(ShellError::CantConvert( - "lazy or eager dataframe".into(), - value.get_type().to_string(), - value.span()?, - None, - )) - } - } - pub fn apply_with_expr(self, expr: NuExpression, f: F) -> Self where F: Fn(LazyFrame, Expr) -> LazyFrame, { - let df = self.0.expect("Lazy frame must not be empty to apply"); + let df = self.lazy.expect("Lazy frame must not be empty to apply"); let expr = expr.into_polars(); let new_frame = f(df, expr); - new_frame.into() + Self { + from_eager: self.from_eager, + lazy: Some(new_frame), + } } } diff --git a/crates/nu-command/src/dataframe/values/nu_lazygroupby/mod.rs b/crates/nu-command/src/dataframe/values/nu_lazygroupby/mod.rs index 1b75d6b5e3..56afda567c 100644 --- a/crates/nu-command/src/dataframe/values/nu_lazygroupby/mod.rs +++ b/crates/nu-command/src/dataframe/values/nu_lazygroupby/mod.rs @@ -91,8 +91,8 @@ impl NuLazyGroupBy { from_eager: group.from_eager, }), None => Err(ShellError::CantConvert( - "lazy frame".into(), - "non-dataframe".into(), + "lazy groupby".into(), + "custom value".into(), span, None, )), diff --git a/crates/nu-command/src/dataframe/values/nu_when/custom_value.rs b/crates/nu-command/src/dataframe/values/nu_when/custom_value.rs new file mode 100644 index 0000000000..81980a12e0 --- /dev/null +++ b/crates/nu-command/src/dataframe/values/nu_when/custom_value.rs @@ -0,0 +1,44 @@ +use super::NuWhen; +use nu_protocol::{CustomValue, ShellError, Span, Value}; + +// CustomValue implementation for NuDataFrame +impl CustomValue for NuWhen { + fn typetag_name(&self) -> &'static str { + "when" + } + + fn typetag_deserialize(&self) { + unimplemented!("typetag_deserialize") + } + + fn clone_value(&self, span: nu_protocol::Span) -> Value { + let cloned = self.clone(); + + Value::CustomValue { + val: Box::new(cloned), + span, + } + } + + fn value_string(&self) -> String { + self.typetag_name().to_string() + } + + fn to_base_value(&self, span: Span) -> Result { + let val = match self { + NuWhen::WhenThen(_) => "whenthen".into(), + NuWhen::WhenThenThen(_) => "whenthenthen".into(), + }; + + let value = Value::String { val, span }; + Ok(value) + } + + fn to_json(&self) -> nu_json::Value { + nu_json::Value::Null + } + + fn as_any(&self) -> &dyn std::any::Any { + self + } +} diff --git a/crates/nu-command/src/dataframe/values/nu_when/mod.rs b/crates/nu-command/src/dataframe/values/nu_when/mod.rs new file mode 100644 index 0000000000..883eb25218 --- /dev/null +++ b/crates/nu-command/src/dataframe/values/nu_when/mod.rs @@ -0,0 +1,79 @@ +mod custom_value; + +use core::fmt; +use nu_protocol::{ShellError, Span, Value}; +use polars::prelude::{col, when, WhenThen, WhenThenThen}; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; + +#[derive(Clone)] +pub enum NuWhen { + WhenThen(WhenThen), + WhenThenThen(WhenThenThen), +} + +// Mocked serialization of the LazyFrame object +impl Serialize for NuWhen { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + serializer.serialize_none() + } +} + +// Mocked deserialization of the LazyFrame object +impl<'de> Deserialize<'de> for NuWhen { + fn deserialize(_deserializer: D) -> Result + where + D: Deserializer<'de>, + { + Ok(NuWhen::WhenThen(when(col("a")).then(col("b")))) + } +} + +impl fmt::Debug for NuWhen { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "NuWhen") + } +} + +impl From for NuWhen { + fn from(when_then: WhenThen) -> Self { + NuWhen::WhenThen(when_then) + } +} + +impl From for NuWhen { + fn from(when_then_then: WhenThenThen) -> Self { + NuWhen::WhenThenThen(when_then_then) + } +} + +impl NuWhen { + pub fn into_value(self, span: Span) -> Value { + Value::CustomValue { + val: Box::new(self), + span, + } + } + + pub fn try_from_value(value: Value) -> Result { + match value { + Value::CustomValue { val, span } => match val.as_any().downcast_ref::() { + Some(expr) => Ok(expr.clone()), + None => Err(ShellError::CantConvert( + "when expression".into(), + "non when expression".into(), + span, + None, + )), + }, + x => Err(ShellError::CantConvert( + "when expression".into(), + x.get_type().to_string(), + x.span()?, + None, + )), + } + } +}