diff --git a/Cargo.lock b/Cargo.lock index 23e18657db..7e34334db4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -478,17 +478,6 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ada7f35ca622a86a4d6c27be2633fc6c243ecc834859628fcce0681d8e76e1c8" -[[package]] -name = "brotli" -version = "3.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d640d25bc63c50fb1f0b545ffd80207d2e10a4c965530809b40ba3386825c391" -dependencies = [ - "alloc-no-stdlib", - "alloc-stdlib", - "brotli-decompressor 2.5.1", -] - [[package]] name = "brotli" version = "5.0.0" @@ -497,17 +486,7 @@ checksum = "19483b140a7ac7174d34b5a581b406c64f84da5409d3e09cf4fff604f9270e67" dependencies = [ "alloc-no-stdlib", "alloc-stdlib", - "brotli-decompressor 4.0.0", -] - -[[package]] -name = "brotli-decompressor" -version = "2.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e2e4afe60d7dd600fdd3de8d0f08c2b7ec039712e3b6137ff98b7004e82de4f" -dependencies = [ - "alloc-no-stdlib", - "alloc-stdlib", + "brotli-decompressor", ] [[package]] @@ -871,7 +850,7 @@ checksum = "b34115915337defe99b2aff5c2ce6771e5fbc4079f4b506301f5cf394c8452f7" dependencies = [ "crossterm", "strum", - "strum_macros 0.26.2", + "strum_macros", "unicode-width", ] @@ -1295,6 +1274,9 @@ name = "either" version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a47c1c47d2f5964e29c61246e81db715514cd532db6b5116a25ea3c03d6780a2" +dependencies = [ + "serde", +] [[package]] name = "eml-parser" @@ -1794,6 +1776,7 @@ dependencies = [ "ahash 0.8.11", "allocator-api2", "rayon", + "serde", ] [[package]] @@ -2935,7 +2918,7 @@ dependencies = [ "alphanumeric-sort", "base64 0.22.1", "bracoxide", - "brotli 5.0.0", + "brotli", "byteorder", "bytesize", "calamine", @@ -3222,7 +3205,7 @@ dependencies = [ name = "nu-protocol" version = "0.94.3" dependencies = [ - "brotli 5.0.0", + "brotli", "byte-unit", "chrono", "chrono-humanize", @@ -3243,7 +3226,7 @@ dependencies = [ "serde", "serde_json", "strum", - "strum_macros 0.26.2", + "strum_macros", "tempfile", "thiserror", "typetag", @@ -3404,7 +3387,7 @@ dependencies = [ "polars-plan", "polars-utils", "serde", - "sqlparser 0.45.0", + "sqlparser 0.47.0", "tempfile", "typetag", "uuid", @@ -4014,9 +3997,9 @@ dependencies = [ [[package]] name = "polars" -version = "0.39.2" +version = "0.40.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ea21b858b16b9c0e17a12db2800d11aa5b4bd182be6b3022eb537bbfc1f2db5" +checksum = "e148396dca5496566880fa19374f3f789a29db94e3eb458afac1497b4bac5442" dependencies = [ "getrandom", "polars-arrow", @@ -4034,9 +4017,9 @@ dependencies = [ [[package]] name = "polars-arrow" -version = "0.39.2" +version = "0.40.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "725b09f2b5ef31279b66e27bbab63c58d49d8f6696b66b1f46c7eaab95e80f75" +checksum = "1cb5e11cd0752ae022fa6ca3afa50a14b0301b7ce53c0135828fbb0f4fa8303e" dependencies = [ "ahash 0.8.11", "atoi", @@ -4082,9 +4065,9 @@ dependencies = [ [[package]] name = "polars-compute" -version = "0.39.2" +version = "0.40.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a796945b14b14fbb79b91ef0406e6fddca2be636e889f81ea5d6ee7d36efb4fe" +checksum = "89fc4578f826234cdecb782952aa9c479dc49373f81694a7b439c70b6f609ba0" dependencies = [ "bytemuck", "either", @@ -4098,9 +4081,9 @@ dependencies = [ [[package]] name = "polars-core" -version = "0.39.2" +version = "0.40.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "465f70d3e96b6d0b1a43c358ba451286b8c8bd56696feff020d65702aa33e35c" +checksum = "e490c6bace1366a558feea33d1846f749a8ca90bd72a6748752bc65bb4710b2a" dependencies = [ "ahash 0.8.11", "bitflags 2.5.0", @@ -4132,9 +4115,9 @@ dependencies = [ [[package]] name = "polars-error" -version = "0.39.2" +version = "0.40.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5224d5d05e6b8a6f78b75951ae1b5f82c8ab1979e11ffaf5fd41941e3d5b0757" +checksum = "08888f58e61599b00f5ea0c2ccdc796b54b9859559cc0d4582733509451fa01a" dependencies = [ "avro-schema", "polars-arrow-format", @@ -4144,10 +4127,30 @@ dependencies = [ ] [[package]] -name = "polars-io" -version = "0.39.2" +name = "polars-expr" +version = "0.40.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2c8589e418cbe4a48228d64b2a8a40284a82ec3c98817c0c2bcc0267701338b" +checksum = "4173591920fe56ad55af025f92eb0d08421ca85705c326a640c43856094e3484" +dependencies = [ + "ahash 0.8.11", + "bitflags 2.5.0", + "once_cell", + "polars-arrow", + "polars-core", + "polars-io", + "polars-ops", + "polars-plan", + "polars-time", + "polars-utils", + "rayon", + "smartstring", +] + +[[package]] +name = "polars-io" +version = "0.40.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5842896aea46d975b425d63f156f412aed3cfde4c257b64fb1f43ceea288074e" dependencies = [ "ahash 0.8.11", "async-trait", @@ -4186,9 +4189,9 @@ dependencies = [ [[package]] name = "polars-json" -version = "0.39.2" +version = "0.40.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81224492a649a12b668480c0cf219d703f432509765d2717e72fe32ad16fc701" +checksum = "160cbad0145b93ac6a88639aadfa6f7d7c769d05a8674f9b7e895b398cae9901" dependencies = [ "ahash 0.8.11", "chrono", @@ -4207,9 +4210,9 @@ dependencies = [ [[package]] name = "polars-lazy" -version = "0.39.2" +version = "0.40.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89b2632b1af668e2058d5f8f916d8fbde3cac63d03ae29a705f598e41dcfeb7f" +checksum = "e805ea2ebbc6b7749b0afb31b7fc5d32b42b57ba29b984549d43d3a16114c4a5" dependencies = [ "ahash 0.8.11", "bitflags 2.5.0", @@ -4217,6 +4220,7 @@ dependencies = [ "once_cell", "polars-arrow", "polars-core", + "polars-expr", "polars-io", "polars-json", "polars-ops", @@ -4231,13 +4235,13 @@ dependencies = [ [[package]] name = "polars-ops" -version = "0.39.2" +version = "0.40.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "efdbdb4d9a92109bc2e0ce8e17af5ae8ab643bb5b7ee9d1d74f0aeffd1fbc95f" +checksum = "7b0aed7e169c81b98457641cf82b251f52239a668916c2e683abd1f38df00d58" dependencies = [ "ahash 0.8.11", "argminmax", - "base64 0.21.7", + "base64 0.22.1", "bytemuck", "chrono", "chrono-tz 0.8.6", @@ -4267,14 +4271,14 @@ dependencies = [ [[package]] name = "polars-parquet" -version = "0.39.2" +version = "0.40.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b421d2196f786fdfe162db614c8485f8308fe41575d4de634a39bbe460d1eb6a" +checksum = "c70670a9e51cac66d0e77fd20b5cc957dbcf9f2660d410633862bb72f846d5b8" dependencies = [ "ahash 0.8.11", "async-stream", - "base64 0.21.7", - "brotli 3.5.0", + "base64 0.22.1", + "brotli", "ethnum", "flate2", "futures", @@ -4293,9 +4297,9 @@ dependencies = [ [[package]] name = "polars-pipe" -version = "0.39.2" +version = "0.40.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48700f1d5bd56a15451e581f465c09541492750360f18637b196f995470a015c" +checksum = "0a40ae1b3c74ee07e2d1f7cbf56c5d6e15969e45d9b6f0903bd2acaf783ba436" dependencies = [ "crossbeam-channel", "crossbeam-queue", @@ -4305,6 +4309,7 @@ dependencies = [ "polars-arrow", "polars-compute", "polars-core", + "polars-expr", "polars-io", "polars-ops", "polars-plan", @@ -4318,13 +4323,14 @@ dependencies = [ [[package]] name = "polars-plan" -version = "0.39.2" +version = "0.40.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fb8e2302e20c44defd5be8cad9c96e75face63c3a5f609aced8c4ec3b3ac97d" +checksum = "8daa3541ae7e9af311a4389bc2b21f83349c34c723cc67fa524cdefdaa172d90" dependencies = [ "ahash 0.8.11", "bytemuck", "chrono-tz 0.8.6", + "either", "hashbrown 0.14.5", "once_cell", "percent-encoding", @@ -4341,15 +4347,15 @@ dependencies = [ "regex", "serde", "smartstring", - "strum_macros 0.25.3", + "strum_macros", "version_check", ] [[package]] name = "polars-row" -version = "0.39.2" +version = "0.40.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a515bdc68c2ae3702e3de70d89601f3b71ca8137e282a226dddb53ee4bacfa2e" +checksum = "deb285f2f3a65b00dd06bef16bb9f712dbb5478f941dab5cf74f9f016d382e40" dependencies = [ "bytemuck", "polars-arrow", @@ -4359,11 +4365,12 @@ dependencies = [ [[package]] name = "polars-sql" -version = "0.39.2" +version = "0.40.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b4bb7cc1c04c3023d1953b2f1dec50515e8fd8169a5a2bf4967b3b082232db7" +checksum = "a724f699d194cb02c25124d3832f7d4d77f387f1a89ee42f6b9e88ec561d4ad9" dependencies = [ "hex", + "once_cell", "polars-arrow", "polars-core", "polars-error", @@ -4377,11 +4384,12 @@ dependencies = [ [[package]] name = "polars-time" -version = "0.39.2" +version = "0.40.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "efc18e3ad92eec55db89d88f16c22d436559ba7030cf76f86f6ed7a754b673f1" +checksum = "87ebec238d8b6200d9f0c3ce411c8441e950bd5a7df7806b8172d06c1d5a4b97" dependencies = [ "atoi", + "bytemuck", "chrono", "chrono-tz 0.8.6", "now", @@ -4398,9 +4406,9 @@ dependencies = [ [[package]] name = "polars-utils" -version = "0.39.2" +version = "0.40.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c760b6c698cfe2fbbbd93d6cfb408db14ececfe1d92445dae2229ce1b5b21ae8" +checksum = "34e1a907c63abf71e5f21467e2e4ff748896c28196746f631c6c25512ec6102c" dependencies = [ "ahash 0.8.11", "bytemuck", @@ -4834,7 +4842,7 @@ dependencies = [ "serde_json", "strip-ansi-escapes", "strum", - "strum_macros 0.26.2", + "strum_macros", "thiserror", "unicode-segmentation", "unicode-width", @@ -5562,9 +5570,9 @@ dependencies = [ [[package]] name = "sqlparser" -version = "0.45.0" +version = "0.47.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7bbffee862a796d67959a89859d6b1046bb5016d63e23835ad0da182777bbe0" +checksum = "295e9930cd7a97e58ca2a070541a3ca502b17f5d1fa7157376d0fabd85324f25" dependencies = [ "log", ] @@ -5678,20 +5686,7 @@ version = "0.26.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5d8cec3501a5194c432b2b7976db6b7d10ec95c253208b45f83f7136aa985e29" dependencies = [ - "strum_macros 0.26.2", -] - -[[package]] -name = "strum_macros" -version = "0.25.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23dc1fa9ac9c169a78ba62f0b841814b7abae11bdd047b9c58f893439e309ea0" -dependencies = [ - "heck 0.4.1", - "proc-macro2", - "quote", - "rustversion", - "syn 2.0.60", + "strum_macros", ] [[package]] diff --git a/crates/nu_plugin_polars/Cargo.toml b/crates/nu_plugin_polars/Cargo.toml index a2c693128d..98288f3ebf 100644 --- a/crates/nu_plugin_polars/Cargo.toml +++ b/crates/nu_plugin_polars/Cargo.toml @@ -29,12 +29,12 @@ indexmap = { version = "2.2" } mimalloc = { version = "0.1.42" } num = {version = "0.4"} serde = { version = "1.0", features = ["derive"] } -sqlparser = { version = "0.45"} -polars-io = { version = "0.39", features = ["avro"]} -polars-arrow = { version = "0.39"} -polars-ops = { version = "0.39"} -polars-plan = { version = "0.39", features = ["regex"]} -polars-utils = { version = "0.39"} +sqlparser = { version = "0.47"} +polars-io = { version = "0.40", features = ["avro"]} +polars-arrow = { version = "0.40"} +polars-ops = { version = "0.40"} +polars-plan = { version = "0.40", features = ["regex"]} +polars-utils = { version = "0.40"} typetag = "0.2" uuid = { version = "1.7", features = ["v4", "serde"] } @@ -70,7 +70,7 @@ features = [ "to_dummies", ] optional = false -version = "0.39" +version = "0.40" [dev-dependencies] nu-cmd-lang = { path = "../nu-cmd-lang", version = "0.94.3" } diff --git a/crates/nu_plugin_polars/src/dataframe/eager/open.rs b/crates/nu_plugin_polars/src/dataframe/eager/open.rs index 81ea537574..13a65074f0 100644 --- a/crates/nu_plugin_polars/src/dataframe/eager/open.rs +++ b/crates/nu_plugin_polars/src/dataframe/eager/open.rs @@ -16,14 +16,17 @@ use std::{ fs::File, io::BufReader, path::{Path, PathBuf}, + sync::Arc, }; use polars::prelude::{ - CsvEncoding, CsvReader, IpcReader, JsonFormat, JsonReader, LazyCsvReader, LazyFileListReader, - LazyFrame, ParquetReader, ScanArgsIpc, ScanArgsParquet, SerReader, + CsvEncoding, IpcReader, JsonFormat, JsonReader, LazyCsvReader, LazyFileListReader, LazyFrame, + ParquetReader, ScanArgsIpc, ScanArgsParquet, SerReader, }; -use polars_io::{avro::AvroReader, prelude::ParallelStrategy, HiveOptions}; +use polars_io::{ + avro::AvroReader, csv::read::CsvReadOptions, prelude::ParallelStrategy, HiveOptions, +}; #[derive(Clone)] pub struct OpenDataFrame; @@ -175,6 +178,7 @@ fn from_parquet( cloud_options: None, use_statistics: false, hive_options: HiveOptions::default(), + glob: true, }; let df: NuLazyFrame = LazyFrame::scan_parquet(file, args) @@ -445,7 +449,7 @@ fn from_csv( } }; - let csv_reader = csv_reader.has_header(!no_header); + let csv_reader = csv_reader.with_has_header(!no_header); let csv_reader = match maybe_schema { Some(schema) => csv_reader.with_schema(Some(schema.into())), @@ -475,7 +479,23 @@ fn from_csv( df.cache_and_to_value(plugin, engine, call.head) } else { - let csv_reader = CsvReader::from_path(file_path) + let df = CsvReadOptions::default() + .with_has_header(!no_header) + .with_infer_schema_length(infer_schema) + .with_skip_rows(skip_rows.unwrap_or_default()) + .with_schema(maybe_schema.map(|s| s.into())) + .with_columns(columns.map(Arc::new)) + .map_parse_options(|options| { + options + .with_separator( + delimiter + .as_ref() + .and_then(|d| d.item.chars().next().map(|c| c as u8)) + .unwrap_or(b','), + ) + .with_encoding(CsvEncoding::LossyUtf8) + }) + .try_into_reader_with_file_path(Some(file_path.to_path_buf())) .map_err(|e| ShellError::GenericError { error: "Error creating CSV reader".into(), msg: e.to_string(), @@ -483,52 +503,6 @@ fn from_csv( help: None, inner: vec![], })? - .with_encoding(CsvEncoding::LossyUtf8); - - let csv_reader = match delimiter { - None => csv_reader, - Some(d) => { - if d.item.len() != 1 { - return Err(ShellError::GenericError { - error: "Incorrect delimiter".into(), - msg: "Delimiter has to be one character".into(), - span: Some(d.span), - help: None, - inner: vec![], - }); - } else { - let delimiter = match d.item.chars().next() { - Some(d) => d as u8, - None => unreachable!(), - }; - csv_reader.with_separator(delimiter) - } - } - }; - - let csv_reader = csv_reader.has_header(!no_header); - - let csv_reader = match maybe_schema { - Some(schema) => csv_reader.with_schema(Some(schema.into())), - None => csv_reader, - }; - - let csv_reader = match infer_schema { - None => csv_reader, - Some(r) => csv_reader.infer_schema(Some(r)), - }; - - let csv_reader = match skip_rows { - None => csv_reader, - Some(r) => csv_reader.with_skip_rows(r), - }; - - let csv_reader = match columns { - None => csv_reader, - Some(columns) => csv_reader.with_columns(Some(columns)), - }; - - let df: NuDataFrame = csv_reader .finish() .map_err(|e| ShellError::GenericError { error: "CSV reader error".into(), @@ -536,9 +510,8 @@ fn from_csv( span: Some(call.head), help: None, inner: vec![], - })? - .into(); - + })?; + let df = NuDataFrame::new(false, df); df.cache_and_to_value(plugin, engine, call.head) } } diff --git a/crates/nu_plugin_polars/src/dataframe/eager/sql_expr.rs b/crates/nu_plugin_polars/src/dataframe/eager/sql_expr.rs index 9c0728ea5f..163e974a1a 100644 --- a/crates/nu_plugin_polars/src/dataframe/eager/sql_expr.rs +++ b/crates/nu_plugin_polars/src/dataframe/eager/sql_expr.rs @@ -3,7 +3,8 @@ use polars::prelude::{col, lit, DataType, Expr, LiteralValue, PolarsResult as Re use sqlparser::ast::{ ArrayElemTypeDef, BinaryOperator as SQLBinaryOperator, DataType as SQLDataType, - Expr as SqlExpr, Function as SQLFunction, Value as SqlValue, WindowType, + DuplicateTreatment, Expr as SqlExpr, Function as SQLFunction, FunctionArguments, + Value as SqlValue, WindowType, }; fn map_sql_polars_datatype(data_type: &SQLDataType) -> Result { @@ -33,7 +34,7 @@ fn map_sql_polars_datatype(data_type: &SQLDataType) -> Result { SQLDataType::Interval => DataType::Duration(TimeUnit::Microseconds), SQLDataType::Array(array_type_def) => match array_type_def { ArrayElemTypeDef::AngleBracket(inner_type) - | ArrayElemTypeDef::SquareBracket(inner_type) => { + | ArrayElemTypeDef::SquareBracket(inner_type, _) => { DataType::List(Box::new(map_sql_polars_datatype(inner_type)?)) } _ => { @@ -120,9 +121,7 @@ pub fn parse_sql_expr(expr: &SqlExpr) -> Result { } SqlExpr::Function(sql_function) => parse_sql_function(sql_function)?, SqlExpr::Cast { - expr, - data_type, - format: _, + expr, data_type, .. } => cast_(parse_sql_expr(expr)?, data_type)?, SqlExpr::Nested(expr) => parse_sql_expr(expr)?, SqlExpr::Value(value) => literal_expr(value)?, @@ -162,8 +161,17 @@ fn parse_sql_function(sql_function: &SQLFunction) -> Result { use sqlparser::ast::{FunctionArg, FunctionArgExpr}; // Function name mostly do not have name space, so it mostly take the first args let function_name = sql_function.name.0[0].value.to_ascii_lowercase(); - let args = sql_function - .args + + // One day this should support the additional argument types supported with 0.40 + let (args, distinct) = match &sql_function.args { + FunctionArguments::List(list) => ( + list.args.clone(), + list.duplicate_treatment == Some(DuplicateTreatment::Distinct), + ), + _ => (vec![], false), + }; + + let args = args .iter() .map(|arg| match arg { FunctionArg::Named { arg, .. } => arg, @@ -174,15 +182,15 @@ fn parse_sql_function(sql_function: &SQLFunction) -> Result { match ( function_name.as_str(), args.as_slice(), - sql_function.distinct, + distinct, ) { - ("sum", [FunctionArgExpr::Expr(expr)], false) => { + ("sum", [FunctionArgExpr::Expr(ref expr)], false) => { apply_window_spec(parse_sql_expr(expr)?, sql_function.over.as_ref())?.sum() } - ("count", [FunctionArgExpr::Expr(expr)], false) => { + ("count", [FunctionArgExpr::Expr(ref expr)], false) => { apply_window_spec(parse_sql_expr(expr)?, sql_function.over.as_ref())?.count() } - ("count", [FunctionArgExpr::Expr(expr)], true) => { + ("count", [FunctionArgExpr::Expr(ref expr)], true) => { apply_window_spec(parse_sql_expr(expr)?, sql_function.over.as_ref())?.n_unique() } // Special case for wildcard args to count function. diff --git a/crates/nu_plugin_polars/src/dataframe/eager/summary.rs b/crates/nu_plugin_polars/src/dataframe/eager/summary.rs index 5e377210b9..74e96fc7be 100644 --- a/crates/nu_plugin_polars/src/dataframe/eager/summary.rs +++ b/crates/nu_plugin_polars/src/dataframe/eager/summary.rs @@ -189,53 +189,19 @@ fn command( .map(|col| { let count = col.len() as f64; - let sum = col.sum_as_series().ok().and_then(|series| { - series - .cast(&DataType::Float64) - .ok() - .and_then(|ca| match ca.get(0) { - Ok(AnyValue::Float64(v)) => Some(v), - _ => None, - }) - }); - - let mean = match col.mean_as_series().get(0) { - Ok(AnyValue::Float64(v)) => Some(v), - _ => None, - }; - - let median = match col.median_as_series() { - Ok(v) => match v.get(0) { - Ok(AnyValue::Float64(v)) => Some(v), - _ => None, - }, - _ => None, - }; - - let std = match col.std_as_series(0) { - Ok(v) => match v.get(0) { - Ok(AnyValue::Float64(v)) => Some(v), - _ => None, - }, - _ => None, - }; - - let min = col.min_as_series().ok().and_then(|series| { - series - .cast(&DataType::Float64) - .ok() - .and_then(|ca| match ca.get(0) { - Ok(AnyValue::Float64(v)) => Some(v), - _ => None, - }) - }); + let sum = col.sum::().ok(); + let mean = col.mean(); + let median = col.median(); + let std = col.std(0); + let min = col.min::().ok().flatten(); let mut quantiles = quantiles .clone() .into_iter() .map(|q| { - col.quantile_as_series(q, QuantileInterpolOptions::default()) + col.quantile_reduce(q, QuantileInterpolOptions::default()) .ok() + .map(|s| s.into_series("quantile")) .and_then(|ca| ca.cast(&DataType::Float64).ok()) .and_then(|ca| match ca.get(0) { Ok(AnyValue::Float64(v)) => Some(v), @@ -244,15 +210,7 @@ fn command( }) .collect::>>(); - let max = col.max_as_series().ok().and_then(|series| { - series - .cast(&DataType::Float64) - .ok() - .and_then(|ca| match ca.get(0) { - Ok(AnyValue::Float64(v)) => Some(v), - _ => None, - }) - }); + let max = col.max::().ok().flatten(); let mut descriptors = vec![Some(count), sum, mean, median, std, min]; descriptors.append(&mut quantiles); diff --git a/crates/nu_plugin_polars/src/dataframe/expressions/expressions_macro.rs b/crates/nu_plugin_polars/src/dataframe/expressions/expressions_macro.rs index 5b1f9f1a73..58b60f3427 100644 --- a/crates/nu_plugin_polars/src/dataframe/expressions/expressions_macro.rs +++ b/crates/nu_plugin_polars/src/dataframe/expressions/expressions_macro.rs @@ -5,9 +5,7 @@ use crate::dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame}; use crate::values::CustomValueSupport; use crate::PolarsPlugin; use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; -use nu_protocol::{ - Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Type, Value, -}; +use nu_protocol::{Category, Example, LabeledError, PipelineData, Signature, Span, Type, Value}; // The structs defined in this file are structs that form part of other commands // since they share a similar name @@ -60,6 +58,7 @@ macro_rules! expr_command { mod $test { use super::*; use crate::test::test_polars_plugin_command; + use nu_protocol::ShellError; #[test] fn test_examples() -> Result<(), ShellError> { @@ -163,19 +162,7 @@ macro_rules! lazy_expr_command { if NuDataFrame::can_downcast(&value) || NuLazyFrame::can_downcast(&value) { let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value) .map_err(LabeledError::from)?; - let lazy = NuLazyFrame::new( - lazy.from_eager, - lazy.to_polars() - .$func() - .map_err(|e| ShellError::GenericError { - error: "Dataframe Error".into(), - msg: e.to_string(), - help: None, - span: None, - inner: vec![], - }) - .map_err(LabeledError::from)?, - ); + let lazy = NuLazyFrame::new(lazy.from_eager, lazy.to_polars().$func()); lazy.to_pipeline_data(plugin, engine, call.head) .map_err(LabeledError::from) } else { @@ -192,6 +179,7 @@ macro_rules! lazy_expr_command { mod $test { use super::*; use crate::test::test_polars_plugin_command; + use nu_protocol::ShellError; #[test] fn test_examples() -> Result<(), ShellError> { @@ -244,19 +232,7 @@ macro_rules! lazy_expr_command { if NuDataFrame::can_downcast(&value) || NuLazyFrame::can_downcast(&value) { let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value) .map_err(LabeledError::from)?; - let lazy = NuLazyFrame::new( - lazy.from_eager, - lazy.to_polars() - .$func($ddof) - .map_err(|e| ShellError::GenericError { - error: "Dataframe Error".into(), - msg: e.to_string(), - help: None, - span: None, - inner: vec![], - }) - .map_err(LabeledError::from)?, - ); + let lazy = NuLazyFrame::new(lazy.from_eager, lazy.to_polars().$func($ddof)); lazy.to_pipeline_data(plugin, engine, call.head) .map_err(LabeledError::from) } else { @@ -272,6 +248,7 @@ macro_rules! lazy_expr_command { mod $test { use super::*; use crate::test::test_polars_plugin_command; + use nu_protocol::ShellError; #[test] fn test_examples() -> Result<(), ShellError> { diff --git a/crates/nu_plugin_polars/src/dataframe/expressions/lit.rs b/crates/nu_plugin_polars/src/dataframe/expressions/lit.rs index 31f0f76cbb..1238717b2d 100644 --- a/crates/nu_plugin_polars/src/dataframe/expressions/lit.rs +++ b/crates/nu_plugin_polars/src/dataframe/expressions/lit.rs @@ -35,7 +35,7 @@ impl PluginCommand for ExprLit { example: "polars lit 2 | polars into-nu", result: Some(Value::test_record(record! { "expr" => Value::test_string("literal"), - "value" => Value::test_string("2"), + "value" => Value::test_string("dyn int: 2"), })), }] } diff --git a/crates/nu_plugin_polars/src/dataframe/lazy/aggregate.rs b/crates/nu_plugin_polars/src/dataframe/lazy/aggregate.rs index 8fa717954f..d2fd92ec87 100644 --- a/crates/nu_plugin_polars/src/dataframe/lazy/aggregate.rs +++ b/crates/nu_plugin_polars/src/dataframe/lazy/aggregate.rs @@ -195,6 +195,7 @@ fn get_col_name(expr: &Expr) -> Option { | Expr::Len | Expr::Nth(_) | Expr::SubPlan(_, _) + | Expr::IndexColumn(_) | Expr::Selector(_) => None, } } diff --git a/crates/nu_plugin_polars/src/dataframe/lazy/join.rs b/crates/nu_plugin_polars/src/dataframe/lazy/join.rs index d6d2ef69ba..67f5aee9ba 100644 --- a/crates/nu_plugin_polars/src/dataframe/lazy/join.rs +++ b/crates/nu_plugin_polars/src/dataframe/lazy/join.rs @@ -189,7 +189,7 @@ impl PluginCommand for LazyJoin { let how = if left { JoinType::Left } else if outer { - JoinType::Outer { coalesce: true } + JoinType::Outer } else if cross { JoinType::Cross } else { diff --git a/crates/nu_plugin_polars/src/dataframe/lazy/median.rs b/crates/nu_plugin_polars/src/dataframe/lazy/median.rs index 6e42856723..8106218c26 100644 --- a/crates/nu_plugin_polars/src/dataframe/lazy/median.rs +++ b/crates/nu_plugin_polars/src/dataframe/lazy/median.rs @@ -116,16 +116,7 @@ fn command( call: &EvaluatedCall, lazy: NuLazyFrame, ) -> Result { - let polars_lazy = lazy - .to_polars() - .median() - .map_err(|e| ShellError::GenericError { - error: format!("Error in median operation: {e}"), - msg: "".into(), - help: None, - span: None, - inner: vec![], - })?; + let polars_lazy = lazy.to_polars().median(); let lazy = NuLazyFrame::new(lazy.from_eager, polars_lazy); lazy.to_pipeline_data(plugin, engine, call.head) } diff --git a/crates/nu_plugin_polars/src/dataframe/lazy/quantile.rs b/crates/nu_plugin_polars/src/dataframe/lazy/quantile.rs index 517fead989..e63fae6610 100644 --- a/crates/nu_plugin_polars/src/dataframe/lazy/quantile.rs +++ b/crates/nu_plugin_polars/src/dataframe/lazy/quantile.rs @@ -134,14 +134,7 @@ fn command( let lazy = NuLazyFrame::new( lazy.from_eager, lazy.to_polars() - .quantile(lit(quantile), QuantileInterpolOptions::default()) - .map_err(|e| ShellError::GenericError { - error: "Dataframe Error".into(), - msg: e.to_string(), - help: None, - span: None, - inner: vec![], - })?, + .quantile(lit(quantile), QuantileInterpolOptions::default()), ); lazy.to_pipeline_data(plugin, engine, call.head) diff --git a/crates/nu_plugin_polars/src/dataframe/series/rolling.rs b/crates/nu_plugin_polars/src/dataframe/series/rolling.rs index 6bc8a3929a..72394d3375 100644 --- a/crates/nu_plugin_polars/src/dataframe/series/rolling.rs +++ b/crates/nu_plugin_polars/src/dataframe/series/rolling.rs @@ -7,7 +7,7 @@ use nu_protocol::{ Category, Example, LabeledError, PipelineData, ShellError, Signature, Span, Spanned, SyntaxShape, Type, Value, }; -use polars::prelude::{DataType, Duration, IntoSeries, RollingOptionsImpl, SeriesOpsTime}; +use polars::prelude::{DataType, IntoSeries, RollingOptionsFixedWindow, SeriesOpsTime}; enum RollType { Min, @@ -131,7 +131,7 @@ fn command( input: PipelineData, ) -> Result { let roll_type: Spanned = call.req(0)?; - let window_size: i64 = call.req(1)?; + let window_size: usize = call.req(1)?; let df = NuDataFrame::try_from_pipeline_coerce(plugin, input, call.head)?; let series = df.as_series(call.head)?; @@ -148,17 +148,12 @@ fn command( let roll_type = RollType::from_str(&roll_type.item, roll_type.span)?; - let rolling_opts = RollingOptionsImpl { - window_size: Duration::new(window_size), - min_periods: window_size as usize, - weights: None, - center: false, - by: None, - closed_window: None, - tu: None, - tz: None, - fn_params: None, + let rolling_opts = RollingOptionsFixedWindow { + window_size, + min_periods: window_size, + ..RollingOptionsFixedWindow::default() }; + let res = match roll_type { RollType::Max => series.rolling_max(rolling_opts), RollType::Min => series.rolling_min(rolling_opts), diff --git a/crates/nu_plugin_polars/src/dataframe/values/nu_expression/mod.rs b/crates/nu_plugin_polars/src/dataframe/values/nu_expression/mod.rs index 96eab00d53..cead8c4a11 100644 --- a/crates/nu_plugin_polars/src/dataframe/values/nu_expression/mod.rs +++ b/crates/nu_plugin_polars/src/dataframe/values/nu_expression/mod.rs @@ -155,7 +155,10 @@ pub fn expr_to_value(expr: &Expr, span: Span) -> Result { span, )), Expr::Columns(columns) => { - let value = columns.iter().map(|col| Value::string(col, span)).collect(); + let value = columns + .iter() + .map(|col| Value::string(col.to_string(), span)) + .collect(); Ok(Value::record( record! { "expr" => Value::string("columns", span), @@ -415,6 +418,12 @@ pub fn expr_to_value(expr: &Expr, span: Span) -> Result { msg_span: span, input_span: Span::unknown(), }), + Expr::IndexColumn(_) => Err(ShellError::UnsupportedInput { + msg: "Expressions of type IndexColumn to Nu Values is not yet supported".to_string(), + input: format!("Expression is {expr:?}"), + msg_span: span, + input_span: Span::unknown(), + }), } } diff --git a/crates/nu_plugin_polars/src/dataframe/values/nu_lazyframe/mod.rs b/crates/nu_plugin_polars/src/dataframe/values/nu_lazyframe/mod.rs index 53e2b425b7..4dc231d706 100644 --- a/crates/nu_plugin_polars/src/dataframe/values/nu_lazyframe/mod.rs +++ b/crates/nu_plugin_polars/src/dataframe/values/nu_lazyframe/mod.rs @@ -160,7 +160,15 @@ impl CustomValueSupport for NuLazyFrame { .unwrap_or_else(|_| "".to_string()); Ok(Value::record( record! { - "plan" => Value::string(self.lazy.describe_plan(), span), + "plan" => Value::string( + self.lazy.describe_plan().map_err(|e| ShellError::GenericError { + error: "Error getting plan".into(), + msg: e.to_string(), + span: Some(span), + help: None, + inner: vec![], + })?, + span), "optimized_plan" => Value::string(optimized_plan, span), }, span, diff --git a/crates/nu_plugin_polars/src/dataframe/values/nu_schema.rs b/crates/nu_plugin_polars/src/dataframe/values/nu_schema.rs index f684b8bb38..5a30d40cd4 100644 --- a/crates/nu_plugin_polars/src/dataframe/values/nu_schema.rs +++ b/crates/nu_plugin_polars/src/dataframe/values/nu_schema.rs @@ -1,7 +1,10 @@ use std::sync::Arc; use nu_protocol::{ShellError, Span, Value}; -use polars::prelude::{DataType, Field, Schema, SchemaRef, TimeUnit}; +use polars::{ + datatypes::UnknownKind, + prelude::{DataType, Field, Schema, SchemaRef, TimeUnit}, +}; #[derive(Debug, Clone)] pub struct NuSchema { @@ -104,7 +107,7 @@ pub fn str_to_dtype(dtype: &str, span: Span) -> Result { "date" => Ok(DataType::Date), "time" => Ok(DataType::Time), "null" => Ok(DataType::Null), - "unknown" => Ok(DataType::Unknown), + "unknown" => Ok(DataType::Unknown(UnknownKind::Any)), "object" => Ok(DataType::Object("unknown", None)), _ if dtype.starts_with("list") => { let dtype = dtype @@ -299,7 +302,7 @@ mod test { let dtype = "unknown"; let schema = str_to_dtype(dtype, Span::unknown()).unwrap(); - let expected = DataType::Unknown; + let expected = DataType::Unknown(UnknownKind::Any); assert_eq!(schema, expected); let dtype = "object";