From 5f818eaefeb5c5481245a1b28475bfa49bfab751 Mon Sep 17 00:00:00 2001 From: Jack Wright <56345+ayax79@users.noreply.github.com> Date: Mon, 15 Apr 2024 16:29:42 -0700 Subject: [PATCH] Ensure that lazy frames converted via to-lazy are not converted back to eager frames later in the pipeline. (#12525) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Description @maxim-uvarov discovered the following error: ``` > [[a b]; [6 2] [1 4] [4 1]] | polars into-lazy | polars sort-by a | polars unique --subset [a] Error: × Error using as series ╭─[entry #1:1:68] 1 │ [[a b]; [6 2] [1 4] [4 1]] | polars into-lazy | polars sort-by a | polars unique --subset [a] · ──────┬────── · ╰── dataframe has more than one column ╰──── ``` During investigation, I discovered the root cause was that the lazy frame was incorrectly converted back to a eager dataframe. In order to keep this from happening, I explicitly set that the dataframe did not come from an eager frame. This causes the conversion logic to not attempt to convert the dataframe later in the pipeline. --------- Co-authored-by: Jack Wright --- .../src/dataframe/lazy/to_lazy.rs | 25 ++++++++++++++++++- .../src/dataframe/values/nu_lazyframe/mod.rs | 2 +- crates/nu_plugin_polars/src/lib.rs | 16 ++++++++---- 3 files changed, 36 insertions(+), 7 deletions(-) diff --git a/crates/nu_plugin_polars/src/dataframe/lazy/to_lazy.rs b/crates/nu_plugin_polars/src/dataframe/lazy/to_lazy.rs index 5d279ee38a..2437699b54 100644 --- a/crates/nu_plugin_polars/src/dataframe/lazy/to_lazy.rs +++ b/crates/nu_plugin_polars/src/dataframe/lazy/to_lazy.rs @@ -52,10 +52,33 @@ impl PluginCommand for ToLazyFrame { .transpose()?; let df = NuDataFrame::try_from_iter(plugin, input.into_iter(), maybe_schema)?; - let lazy = NuLazyFrame::from_dataframe(df); + let mut lazy = NuLazyFrame::from_dataframe(df); + // We don't want this converted back to an eager dataframe at some point + lazy.from_eager = false; Ok(PipelineData::Value( lazy.cache(plugin, engine, call.head)?.into_value(call.head), None, )) } } + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use nu_plugin_test_support::PluginTest; + use nu_protocol::{ShellError, Span}; + + use super::*; + + #[test] + fn test_to_lazy() -> Result<(), ShellError> { + let plugin: Arc = PolarsPlugin::new_test_mode().into(); + let mut plugin_test = PluginTest::new("polars", Arc::clone(&plugin))?; + let pipeline_data = plugin_test.eval("[[a b]; [6 2] [1 4] [4 1]] | polars into-lazy")?; + let value = pipeline_data.into_value(Span::test_data()); + let df = NuLazyFrame::try_from_value(&plugin, &value)?; + assert!(!df.from_eager); + Ok(()) + } +} diff --git a/crates/nu_plugin_polars/src/dataframe/values/nu_lazyframe/mod.rs b/crates/nu_plugin_polars/src/dataframe/values/nu_lazyframe/mod.rs index 75f4791032..bb2ae67130 100644 --- a/crates/nu_plugin_polars/src/dataframe/values/nu_lazyframe/mod.rs +++ b/crates/nu_plugin_polars/src/dataframe/values/nu_lazyframe/mod.rs @@ -64,7 +64,7 @@ impl NuLazyFrame { help: None, inner: vec![], }) - .map(|df| NuDataFrame::new(!self.from_eager, df)) + .map(|df| NuDataFrame::new(false, df)) } pub fn apply_with_expr(self, expr: NuExpression, f: F) -> Self diff --git a/crates/nu_plugin_polars/src/lib.rs b/crates/nu_plugin_polars/src/lib.rs index e95372c54a..2691afb010 100644 --- a/crates/nu_plugin_polars/src/lib.rs +++ b/crates/nu_plugin_polars/src/lib.rs @@ -182,12 +182,18 @@ pub mod test { use nu_plugin_test_support::PluginTest; use nu_protocol::{ShellError, Span}; - pub fn test_polars_plugin_command(command: &impl PluginCommand) -> Result<(), ShellError> { - let plugin = PolarsPlugin { - disable_cache_drop: true, - ..PolarsPlugin::default() - }; + impl PolarsPlugin { + /// Creates a new polars plugin in test mode + pub fn new_test_mode() -> Self { + PolarsPlugin { + disable_cache_drop: true, + ..PolarsPlugin::default() + } + } + } + pub fn test_polars_plugin_command(command: &impl PluginCommand) -> Result<(), ShellError> { + let plugin = PolarsPlugin::new_test_mode(); let examples = command.examples(); // we need to cache values in the examples