diff --git a/crates/nu-command/src/dataframe/lazy/fill_na.rs b/crates/nu-command/src/dataframe/lazy/fill_na.rs deleted file mode 100644 index d55505c867..0000000000 --- a/crates/nu-command/src/dataframe/lazy/fill_na.rs +++ /dev/null @@ -1,68 +0,0 @@ -use crate::dataframe::values::{NuExpression, NuLazyFrame}; -use nu_engine::CallExt; -use nu_protocol::{ - ast::Call, - engine::{Command, EngineState, Stack}, - Category, Example, PipelineData, ShellError, Signature, SyntaxShape, Type, Value, -}; - -#[derive(Clone)] -pub struct LazyFillNA; - -impl Command for LazyFillNA { - fn name(&self) -> &str { - "fill-na" - } - - fn usage(&self) -> &str { - "Replaces NA values with the given expression" - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .required( - "fill", - SyntaxShape::Any, - "Expression to use to fill the NAN values", - ) - .input_type(Type::Custom("dataframe".into())) - .output_type(Type::Custom("dataframe".into())) - .category(Category::Custom("lazyframe".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "", - example: "", - result: None, - }] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - let fill: Value = call.req(engine_state, stack, 0)?; - let value = input.into_value(call.head); - - if NuExpression::can_downcast(&value) { - let expr = NuExpression::try_from_value(value)?; - let fill = NuExpression::try_from_value(fill)?.into_polars(); - let expr: NuExpression = expr.into_polars().fill_nan(fill).into(); - - Ok(PipelineData::Value( - NuExpression::into_value(expr, call.head), - None, - )) - } else { - let lazy = NuLazyFrame::try_from_value(value)?; - let expr = NuExpression::try_from_value(fill)?.into_polars(); - let lazy = NuLazyFrame::new(lazy.from_eager, lazy.into_polars().fill_nan(expr)); - - Ok(PipelineData::Value(lazy.into_value(call.head)?, None)) - } - } -} diff --git a/crates/nu-command/src/dataframe/lazy/fill_nan.rs b/crates/nu-command/src/dataframe/lazy/fill_nan.rs new file mode 100644 index 0000000000..55f3b12d5c --- /dev/null +++ b/crates/nu-command/src/dataframe/lazy/fill_nan.rs @@ -0,0 +1,137 @@ +use crate::dataframe::values::{Column, NuDataFrame, NuExpression}; +use nu_engine::CallExt; +use nu_protocol::{ + ast::Call, + engine::{Command, EngineState, Stack}, + Category, Example, PipelineData, ShellError, Signature, Span, SyntaxShape, Type, Value, +}; + +#[derive(Clone)] +pub struct LazyFillNA; + +impl Command for LazyFillNA { + fn name(&self) -> &str { + "fill-nan" + } + + fn usage(&self) -> &str { + "Replaces NaN values with the given expression" + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .required( + "fill", + SyntaxShape::Any, + "Expression to use to fill the NAN values", + ) + .input_type(Type::Custom("dataframe".into())) + .output_type(Type::Custom("dataframe".into())) + .category(Category::Custom("lazyframe".into())) + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "Fills the NaN values with 0", + example: "[1 2 NaN 3 NaN] | into df | fill-nan 0", + result: Some( + NuDataFrame::try_from_columns(vec![Column::new( + "0".to_string(), + vec![ + Value::test_int(1), + Value::test_int(2), + Value::test_int(0), + Value::test_int(3), + Value::test_int(0), + ], + )]) + .expect("Df for test should not fail") + .into_value(Span::test_data()), + ), + }, + Example { + description: "Fills the NaN values of a whole dataframe", + example: "[[a b]; [0.2 1] [0.1 NaN]] | into df | fill-nan 0", + result: Some( + NuDataFrame::try_from_columns(vec![ + Column::new( + "a".to_string(), + vec![Value::test_float(0.2), Value::test_float(0.1)], + ), + Column::new( + "b".to_string(), + vec![Value::test_int(1), Value::test_int(0)], + ), + ]) + .expect("Df for test should not fail") + .into_value(Span::test_data()), + ), + }, + ] + } + + fn run( + &self, + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, + input: PipelineData, + ) -> Result { + let fill: Value = call.req(engine_state, stack, 0)?; + let value = input.into_value(call.head); + + if NuExpression::can_downcast(&value) { + let expr = NuExpression::try_from_value(value)?; + let fill = NuExpression::try_from_value(fill)?.into_polars(); + let expr: NuExpression = expr.into_polars().fill_nan(fill).into(); + + Ok(PipelineData::Value( + NuExpression::into_value(expr, call.head), + None, + )) + } else { + let val_span = value.span()?; + let frame = NuDataFrame::try_from_value(value)?; + let columns = frame.columns(val_span)?; + let dataframe = columns + .into_iter() + .map(|column| { + let column_name = column.name().to_string(); + let values = column + .into_iter() + .map(|value| match value { + Value::Float { val, .. } => { + if val.is_nan() { + fill.clone() + } else { + value + } + } + Value::List { vals, span } => { + NuDataFrame::fill_list_nan(vals, span, fill.clone()) + } + _ => value, + }) + .collect::>(); + Column::new(column_name, values) + }) + .collect::>(); + Ok(PipelineData::Value( + NuDataFrame::try_from_columns(dataframe)?.into_value(call.head), + None, + )) + } + } +} + +#[cfg(test)] +mod test { + use super::super::super::test_dataframe::test_dataframe; + use super::*; + + #[test] + fn test_examples() { + test_dataframe(vec![Box::new(LazyFillNA {})]) + } +} diff --git a/crates/nu-command/src/dataframe/lazy/mod.rs b/crates/nu-command/src/dataframe/lazy/mod.rs index 2c45861d09..713c53b0ba 100644 --- a/crates/nu-command/src/dataframe/lazy/mod.rs +++ b/crates/nu-command/src/dataframe/lazy/mod.rs @@ -1,7 +1,7 @@ pub mod aggregate; mod collect; mod fetch; -mod fill_na; +mod fill_nan; mod fill_null; mod filter; pub mod groupby; @@ -17,7 +17,7 @@ use nu_protocol::engine::StateWorkingSet; use crate::dataframe::lazy::aggregate::LazyAggregate; pub use crate::dataframe::lazy::collect::LazyCollect; use crate::dataframe::lazy::fetch::LazyFetch; -use crate::dataframe::lazy::fill_na::LazyFillNA; +use crate::dataframe::lazy::fill_nan::LazyFillNA; use crate::dataframe::lazy::fill_null::LazyFillNull; use crate::dataframe::lazy::filter::LazyFilter; use crate::dataframe::lazy::groupby::ToLazyGroupBy; diff --git a/crates/nu-command/src/dataframe/values/nu_dataframe/mod.rs b/crates/nu-command/src/dataframe/values/nu_dataframe/mod.rs index 2f945d61f1..4a39f95f6f 100644 --- a/crates/nu-command/src/dataframe/values/nu_dataframe/mod.rs +++ b/crates/nu-command/src/dataframe/values/nu_dataframe/mod.rs @@ -204,6 +204,33 @@ impl NuDataFrame { conversion::from_parsed_columns(column_values) } + pub fn fill_list_nan(list: Vec, list_span: Span, fill: Value) -> Value { + let newlist = list + .into_iter() + .map(|value| match value { + Value::Float { val, .. } => { + if val.is_nan() { + fill.clone() + } else { + value + } + } + Value::List { vals, span } => Self::fill_list_nan(vals, span, fill.clone()), + _ => value, + }) + .collect::>(); + Value::list(newlist, list_span) + } + + pub fn columns(&self, span: Span) -> Result, ShellError> { + let height = self.df.height(); + self.df + .get_columns() + .iter() + .map(|col| conversion::create_column(col, 0, height, span)) + .collect::, ShellError>>() + } + pub fn try_from_value(value: Value) -> Result { if Self::can_downcast(&value) { Ok(Self::get_df(value)?)