diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/to_df.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/to_df.rs index 8a96751b84..d9431bfbcc 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/to_df.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/eager/to_df.rs @@ -148,6 +148,18 @@ impl Command for ToDataFrame { .into_value(Span::test_data()), ), }, + Example { + description: "Convert to a dataframe and provide a schema that adds a new column", + example: r#"[[a b]; [1 "foo"] [2 "bar"]] | dfr into-df -s {a: u8, b:str, c:i64} | dfr fill-null 3"#, + result: Some(NuDataFrame::try_from_series(vec![ + Series::new("a", [1u8, 2]), + Series::new("b", ["foo", "bar"]), + Series::new("c", [3i64, 3]), + ], Span::test_data()) + .expect("simple df for test should not fail") + .into_value(Span::test_data()), + ), + } ] } @@ -163,8 +175,12 @@ impl Command for ToDataFrame { .map(|schema| NuSchema::try_from(&schema)) .transpose()?; - NuDataFrame::try_from_iter(input.into_iter(), maybe_schema) - .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) + let df = NuDataFrame::try_from_iter(input.into_iter(), maybe_schema.clone())?; + + Ok(PipelineData::Value( + NuDataFrame::into_value(df, call.head), + None, + )) } } diff --git a/crates/nu-cmd-dataframe/src/dataframe/lazy/mod.rs b/crates/nu-cmd-dataframe/src/dataframe/lazy/mod.rs index 331992d371..cbbc4e8589 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/lazy/mod.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/lazy/mod.rs @@ -20,7 +20,7 @@ use crate::dataframe::lazy::aggregate::LazyAggregate; pub use crate::dataframe::lazy::collect::LazyCollect; use crate::dataframe::lazy::fetch::LazyFetch; use crate::dataframe::lazy::fill_nan::LazyFillNA; -use crate::dataframe::lazy::fill_null::LazyFillNull; +pub use crate::dataframe::lazy::fill_null::LazyFillNull; use crate::dataframe::lazy::filter::LazyFilter; use crate::dataframe::lazy::groupby::ToLazyGroupBy; use crate::dataframe::lazy::join::LazyJoin; diff --git a/crates/nu-cmd-dataframe/src/dataframe/test_dataframe.rs b/crates/nu-cmd-dataframe/src/dataframe/test_dataframe.rs index 904beaf313..ce9f049721 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/test_dataframe.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/test_dataframe.rs @@ -7,6 +7,7 @@ use nu_protocol::{ use super::eager::{SchemaDF, ToDataFrame}; use super::expressions::ExprCol; +use super::lazy::LazyFillNull; use super::lazy::{LazyCollect, ToLazyFrame}; use nu_cmd_lang::Let; @@ -37,6 +38,7 @@ pub fn build_test_engine_state(cmds: Vec>) -> Box, span: Span) -> Result { @@ -200,7 +202,8 @@ impl NuDataFrame { } } - conversion::from_parsed_columns(column_values) + let df = conversion::from_parsed_columns(column_values)?; + add_missing_columns(df, &maybe_schema, Span::unknown()) } pub fn fill_list_nan(list: Vec, list_span: Span, fill: Value) -> Value { @@ -510,3 +513,44 @@ impl NuDataFrame { NuSchema::new(self.df.schema()) } } + +fn add_missing_columns( + df: NuDataFrame, + maybe_schema: &Option, + span: Span, +) -> Result { + // If there are fields that are in the schema, but not in the dataframe + // add them to the dataframe. + if let Some(schema) = maybe_schema { + let fields = df.df.fields(); + let df_field_names: HashSet<&str> = fields.iter().map(|f| f.name().as_str()).collect(); + + let missing: Vec<(&str, &DataType)> = schema + .schema + .iter() + .filter_map(|(name, dtype)| { + let name = name.as_str(); + if !df_field_names.contains(name) { + Some((name, dtype)) + } else { + None + } + }) + .collect(); + + let missing_exprs: Vec = missing + .iter() + .map(|(name, dtype)| lit(Null {}).cast((*dtype).to_owned()).alias(name)) + .collect(); + + let df = if !missing.is_empty() { + let with_columns = df.lazy().with_columns(missing_exprs); + NuLazyFrame::new(true, with_columns).collect(span)? + } else { + df + }; + Ok(df) + } else { + Ok(df) + } +}