From 16db368232d095755045ea90ab5805ab0cb91e32 Mon Sep 17 00:00:00 2001 From: Nico Mandery Date: Tue, 16 Nov 2021 00:01:02 +0100 Subject: [PATCH] upgrade polars to 0.17 (#4122) --- crates/nu-command/Cargo.toml | 2 +- .../src/commands/dataframe/describe.rs | 12 +++--- .../nu-command/src/commands/dataframe/join.rs | 17 ++++++++- .../src/commands/dataframe/series/get_day.rs | 2 +- .../src/commands/dataframe/series/get_hour.rs | 2 +- .../commands/dataframe/series/get_minute.rs | 2 +- .../commands/dataframe/series/get_month.rs | 2 +- .../dataframe/series/get_nanosecond.rs | 2 +- .../commands/dataframe/series/get_ordinal.rs | 2 +- .../commands/dataframe/series/get_second.rs | 2 +- .../src/commands/dataframe/series/get_week.rs | 2 +- .../commands/dataframe/series/get_weekday.rs | 2 +- .../src/commands/dataframe/series/get_year.rs | 2 +- .../src/commands/dataframe/series/rolling.rs | 38 ++++++------------- .../commands/dataframe/series/set_with_idx.rs | 2 +- .../src/commands/dataframe/series/strftime.rs | 2 +- .../nu-command/src/commands/dataframe/take.rs | 2 +- .../src/commands/dataframe/to_csv.rs | 4 +- crates/nu-protocol/Cargo.toml | 4 +- .../src/dataframe/compute_between.rs | 8 ++-- .../nu-protocol/src/dataframe/conversion.rs | 28 ++++++-------- .../nu-protocol/src/dataframe/nu_dataframe.rs | 2 +- 22 files changed, 67 insertions(+), 74 deletions(-) diff --git a/crates/nu-command/Cargo.toml b/crates/nu-command/Cargo.toml index 5fd181820e..5f81285f88 100644 --- a/crates/nu-command/Cargo.toml +++ b/crates/nu-command/Cargo.toml @@ -89,7 +89,7 @@ zip = { version="0.5.9", optional=true } digest = "0.9.0" [dependencies.polars] -version = "0.16.0" +version = "0.17.0" optional = true features = ["parquet", "json", "random", "pivot", "strings", "is_in", "temporal", "cum_agg", "rolling_window"] diff --git a/crates/nu-command/src/commands/dataframe/describe.rs b/crates/nu-command/src/commands/dataframe/describe.rs index 41a6da1a93..6cd06bfbca 100644 --- a/crates/nu-command/src/commands/dataframe/describe.rs +++ b/crates/nu-command/src/commands/dataframe/describe.rs @@ -121,7 +121,7 @@ fn command(mut args: CommandArgs) -> Result { let tail = df.as_ref().get_columns().iter().map(|col| { let count = col.len() as f64; - let sum = match col.sum_as_series().cast_with_dtype(&DataType::Float64) { + let sum = match col.sum_as_series().cast(&DataType::Float64) { Ok(ca) => match ca.get(0) { AnyValue::Float64(v) => Some(v), _ => None, @@ -144,7 +144,7 @@ fn command(mut args: CommandArgs) -> Result { _ => None, }; - let min = match col.min_as_series().cast_with_dtype(&DataType::Float64) { + let min = match col.min_as_series().cast(&DataType::Float64) { Ok(ca) => match ca.get(0) { AnyValue::Float64(v) => Some(v), _ => None, @@ -153,7 +153,7 @@ fn command(mut args: CommandArgs) -> Result { }; let q_25 = match col.quantile_as_series(0.25) { - Ok(ca) => match ca.cast_with_dtype(&DataType::Float64) { + Ok(ca) => match ca.cast(&DataType::Float64) { Ok(ca) => match ca.get(0) { AnyValue::Float64(v) => Some(v), _ => None, @@ -164,7 +164,7 @@ fn command(mut args: CommandArgs) -> Result { }; let q_50 = match col.quantile_as_series(0.50) { - Ok(ca) => match ca.cast_with_dtype(&DataType::Float64) { + Ok(ca) => match ca.cast(&DataType::Float64) { Ok(ca) => match ca.get(0) { AnyValue::Float64(v) => Some(v), _ => None, @@ -175,7 +175,7 @@ fn command(mut args: CommandArgs) -> Result { }; let q_75 = match col.quantile_as_series(0.75) { - Ok(ca) => match ca.cast_with_dtype(&DataType::Float64) { + Ok(ca) => match ca.cast(&DataType::Float64) { Ok(ca) => match ca.get(0) { AnyValue::Float64(v) => Some(v), _ => None, @@ -185,7 +185,7 @@ fn command(mut args: CommandArgs) -> Result { Err(_) => None, }; - let max = match col.max_as_series().cast_with_dtype(&DataType::Float64) { + let max = match col.max_as_series().cast(&DataType::Float64) { Ok(ca) => match ca.get(0) { AnyValue::Float64(v) => Some(v), _ => None, diff --git a/crates/nu-command/src/commands/dataframe/join.rs b/crates/nu-command/src/commands/dataframe/join.rs index 66dddda5d1..da18eb8bfd 100644 --- a/crates/nu-command/src/commands/dataframe/join.rs +++ b/crates/nu-command/src/commands/dataframe/join.rs @@ -44,6 +44,12 @@ impl WholeStreamCommand for DataFrame { "type of join. Inner by default", Some('t'), ) + .named( + "suffix", + SyntaxShape::String, + "suffix for the columns of the right dataframe", + Some('s'), + ) } fn run(&self, args: CommandArgs) -> Result { @@ -104,6 +110,7 @@ fn command(mut args: CommandArgs) -> Result { let r_df: Value = args.req(0)?; let l_col: Vec = args.req_named("left")?; let r_col: Vec = args.req_named("right")?; + let r_suffix: Option> = args.get_flag("suffix")?; let join_type_op: Option> = args.get_flag("type")?; let join_type = match join_type_op { @@ -124,6 +131,8 @@ fn command(mut args: CommandArgs) -> Result { }, }; + let suffix = r_suffix.map(|s| s.item); + let (l_col_string, l_col_span) = convert_columns(&l_col, &tag)?; let (r_col_string, r_col_span) = convert_columns(&r_col, &tag)?; @@ -142,7 +151,13 @@ fn command(mut args: CommandArgs) -> Result { )?; df.as_ref() - .join(r_df.as_ref(), &l_col_string, &r_col_string, join_type) + .join( + r_df.as_ref(), + &l_col_string, + &r_col_string, + join_type, + suffix, + ) .map_err(|e| parse_polars_error::<&str>(&e, &l_col_span, None)) } _ => Err(ShellError::labeled_error( diff --git a/crates/nu-command/src/commands/dataframe/series/get_day.rs b/crates/nu-command/src/commands/dataframe/series/get_day.rs index fa02503d4a..7a2d964056 100644 --- a/crates/nu-command/src/commands/dataframe/series/get_day.rs +++ b/crates/nu-command/src/commands/dataframe/series/get_day.rs @@ -53,7 +53,7 @@ fn command(mut args: CommandArgs) -> Result { let series = df.as_series(&df_tag.span)?; let casted = series - .date64() + .datetime() .map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?; let res = casted.day().into_series(); diff --git a/crates/nu-command/src/commands/dataframe/series/get_hour.rs b/crates/nu-command/src/commands/dataframe/series/get_hour.rs index 5666899a85..96baef7fdb 100644 --- a/crates/nu-command/src/commands/dataframe/series/get_hour.rs +++ b/crates/nu-command/src/commands/dataframe/series/get_hour.rs @@ -53,7 +53,7 @@ fn command(mut args: CommandArgs) -> Result { let series = df.as_series(&df_tag.span)?; let casted = series - .date64() + .datetime() .map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?; let res = casted.hour().into_series(); diff --git a/crates/nu-command/src/commands/dataframe/series/get_minute.rs b/crates/nu-command/src/commands/dataframe/series/get_minute.rs index a33653333c..7500404528 100644 --- a/crates/nu-command/src/commands/dataframe/series/get_minute.rs +++ b/crates/nu-command/src/commands/dataframe/series/get_minute.rs @@ -53,7 +53,7 @@ fn command(mut args: CommandArgs) -> Result { let series = df.as_series(&df_tag.span)?; let casted = series - .date64() + .datetime() .map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?; let res = casted.minute().into_series(); diff --git a/crates/nu-command/src/commands/dataframe/series/get_month.rs b/crates/nu-command/src/commands/dataframe/series/get_month.rs index 9998173771..dba70abef8 100644 --- a/crates/nu-command/src/commands/dataframe/series/get_month.rs +++ b/crates/nu-command/src/commands/dataframe/series/get_month.rs @@ -53,7 +53,7 @@ fn command(mut args: CommandArgs) -> Result { let series = df.as_series(&df_tag.span)?; let casted = series - .date64() + .datetime() .map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?; let res = casted.month().into_series(); diff --git a/crates/nu-command/src/commands/dataframe/series/get_nanosecond.rs b/crates/nu-command/src/commands/dataframe/series/get_nanosecond.rs index 5d0250cc09..39f89af772 100644 --- a/crates/nu-command/src/commands/dataframe/series/get_nanosecond.rs +++ b/crates/nu-command/src/commands/dataframe/series/get_nanosecond.rs @@ -53,7 +53,7 @@ fn command(mut args: CommandArgs) -> Result { let series = df.as_series(&df_tag.span)?; let casted = series - .date64() + .datetime() .map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?; let res = casted.nanosecond().into_series(); diff --git a/crates/nu-command/src/commands/dataframe/series/get_ordinal.rs b/crates/nu-command/src/commands/dataframe/series/get_ordinal.rs index a18424e62d..af8537d382 100644 --- a/crates/nu-command/src/commands/dataframe/series/get_ordinal.rs +++ b/crates/nu-command/src/commands/dataframe/series/get_ordinal.rs @@ -56,7 +56,7 @@ fn command(mut args: CommandArgs) -> Result { let series = df.as_series(&df_tag.span)?; let casted = series - .date64() + .datetime() .map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?; let res = casted.ordinal().into_series(); diff --git a/crates/nu-command/src/commands/dataframe/series/get_second.rs b/crates/nu-command/src/commands/dataframe/series/get_second.rs index 26cebaa5a6..13984dbef5 100644 --- a/crates/nu-command/src/commands/dataframe/series/get_second.rs +++ b/crates/nu-command/src/commands/dataframe/series/get_second.rs @@ -53,7 +53,7 @@ fn command(mut args: CommandArgs) -> Result { let series = df.as_series(&df_tag.span)?; let casted = series - .date64() + .datetime() .map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?; let res = casted.second().into_series(); diff --git a/crates/nu-command/src/commands/dataframe/series/get_week.rs b/crates/nu-command/src/commands/dataframe/series/get_week.rs index 06f4bba398..4d51a470c8 100644 --- a/crates/nu-command/src/commands/dataframe/series/get_week.rs +++ b/crates/nu-command/src/commands/dataframe/series/get_week.rs @@ -53,7 +53,7 @@ fn command(mut args: CommandArgs) -> Result { let series = df.as_series(&df_tag.span)?; let casted = series - .date64() + .datetime() .map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?; let res = casted.week().into_series(); diff --git a/crates/nu-command/src/commands/dataframe/series/get_weekday.rs b/crates/nu-command/src/commands/dataframe/series/get_weekday.rs index 8c28feaf1c..75cc997541 100644 --- a/crates/nu-command/src/commands/dataframe/series/get_weekday.rs +++ b/crates/nu-command/src/commands/dataframe/series/get_weekday.rs @@ -53,7 +53,7 @@ fn command(mut args: CommandArgs) -> Result { let series = df.as_series(&df_tag.span)?; let casted = series - .date64() + .datetime() .map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?; let res = casted.weekday().into_series(); diff --git a/crates/nu-command/src/commands/dataframe/series/get_year.rs b/crates/nu-command/src/commands/dataframe/series/get_year.rs index 7c8331a677..3a03e4ef17 100644 --- a/crates/nu-command/src/commands/dataframe/series/get_year.rs +++ b/crates/nu-command/src/commands/dataframe/series/get_year.rs @@ -56,7 +56,7 @@ fn command(mut args: CommandArgs) -> Result { let series = df.as_series(&df_tag.span)?; let casted = series - .date64() + .datetime() .map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?; let res = casted.year().into_series(); diff --git a/crates/nu-command/src/commands/dataframe/series/rolling.rs b/crates/nu-command/src/commands/dataframe/series/rolling.rs index 316473d84f..8a37242b77 100644 --- a/crates/nu-command/src/commands/dataframe/series/rolling.rs +++ b/crates/nu-command/src/commands/dataframe/series/rolling.rs @@ -6,7 +6,7 @@ use nu_protocol::{ Signature, SyntaxShape, UntaggedValue, }; use nu_source::Tagged; -use polars::prelude::DataType; +use polars::prelude::{DataType, RollingOptions}; enum RollType { Min, @@ -57,7 +57,6 @@ impl WholeStreamCommand for DataFrame { Signature::build("dataframe rolling") .required("type", SyntaxShape::String, "rolling operation") .required("window", SyntaxShape::Int, "Window size for rolling") - .switch("ignore_nulls", "Ignore nulls in column", Some('i')) } fn run(&self, args: CommandArgs) -> Result { @@ -112,7 +111,6 @@ fn command(mut args: CommandArgs) -> Result { let tag = args.call_info.name_tag.clone(); let roll_type: Tagged = args.req(0)?; let window_size: Tagged = args.req(1)?; - let ignore_nulls = args.has_flag("ignore_nulls"); let (df, df_tag) = NuDataFrame::try_from_stream(&mut args.input, &tag.span)?; let series = df.as_series(&df_tag.span)?; @@ -126,31 +124,17 @@ fn command(mut args: CommandArgs) -> Result { } let roll_type = RollType::from_str(&roll_type.item, &roll_type.tag.span)?; + let rolling_opts = RollingOptions { + window_size: window_size.item as usize, + min_periods: window_size.item as usize, + weights: None, + center: false, + }; let res = match roll_type { - RollType::Max => series.rolling_max( - window_size.item as u32, - None, - ignore_nulls, - window_size.item as u32, - ), - RollType::Min => series.rolling_min( - window_size.item as u32, - None, - ignore_nulls, - window_size.item as u32, - ), - RollType::Sum => series.rolling_sum( - window_size.item as u32, - None, - ignore_nulls, - window_size.item as u32, - ), - RollType::Mean => series.rolling_mean( - window_size.item as u32, - None, - ignore_nulls, - window_size.item as u32, - ), + RollType::Max => series.rolling_max(rolling_opts), + RollType::Min => series.rolling_min(rolling_opts), + RollType::Sum => series.rolling_sum(rolling_opts), + RollType::Mean => series.rolling_mean(rolling_opts), }; let mut res = res.map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?; diff --git a/crates/nu-command/src/commands/dataframe/series/set_with_idx.rs b/crates/nu-command/src/commands/dataframe/series/set_with_idx.rs index ff582d5cda..e541be455b 100644 --- a/crates/nu-command/src/commands/dataframe/series/set_with_idx.rs +++ b/crates/nu-command/src/commands/dataframe/series/set_with_idx.rs @@ -78,7 +78,7 @@ fn command(mut args: CommandArgs) -> Result { let casted = match indices.dtype() { DataType::UInt32 | DataType::UInt64 | DataType::Int32 | DataType::Int64 => indices .as_ref() - .cast_with_dtype(&DataType::UInt32) + .cast(&DataType::UInt32) .map_err(|e| parse_polars_error::<&str>(&e, &value.tag.span, None)), _ => Err(ShellError::labeled_error_with_secondary( "Incorrect type", diff --git a/crates/nu-command/src/commands/dataframe/series/strftime.rs b/crates/nu-command/src/commands/dataframe/series/strftime.rs index 1c8c8231ea..8be3f31fb7 100644 --- a/crates/nu-command/src/commands/dataframe/series/strftime.rs +++ b/crates/nu-command/src/commands/dataframe/series/strftime.rs @@ -58,7 +58,7 @@ fn command(mut args: CommandArgs) -> Result { let series = df.as_series(&df_tag.span)?; let casted = series - .date64() + .datetime() .map_err(|e| parse_polars_error::<&str>(&e, &df_tag.span, None))?; let res = casted.strftime(&fmt.item).into_series(); diff --git a/crates/nu-command/src/commands/dataframe/take.rs b/crates/nu-command/src/commands/dataframe/take.rs index 293fab526d..1373d8c7db 100644 --- a/crates/nu-command/src/commands/dataframe/take.rs +++ b/crates/nu-command/src/commands/dataframe/take.rs @@ -92,7 +92,7 @@ fn command(mut args: CommandArgs) -> Result { let casted = match series.dtype() { DataType::UInt32 | DataType::UInt64 | DataType::Int32 | DataType::Int64 => series .as_ref() - .cast_with_dtype(&DataType::UInt32) + .cast(&DataType::UInt32) .map_err(|e| parse_polars_error::<&str>(&e, &value.tag.span, None)), _ => Err(ShellError::labeled_error_with_secondary( "Incorrect type", diff --git a/crates/nu-command/src/commands/dataframe/to_csv.rs b/crates/nu-command/src/commands/dataframe/to_csv.rs index e62d766dfe..d0d9a600af 100644 --- a/crates/nu-command/src/commands/dataframe/to_csv.rs +++ b/crates/nu-command/src/commands/dataframe/to_csv.rs @@ -73,9 +73,9 @@ fn command(mut args: CommandArgs) -> Result { let writer = CsvWriter::new(&mut file); let writer = if no_header { - writer.has_headers(false) + writer.has_header(false) } else { - writer.has_headers(true) + writer.has_header(true) }; let writer = match delimiter { diff --git a/crates/nu-protocol/Cargo.toml b/crates/nu-protocol/Cargo.toml index 0a1242b4c1..a98774b2bd 100644 --- a/crates/nu-protocol/Cargo.toml +++ b/crates/nu-protocol/Cargo.toml @@ -27,9 +27,9 @@ serde = { version="1.0", features=["derive"] } serde_bytes = "0.11.5" [dependencies.polars] -version = "0.16.0" +version = "0.17.0" optional = true -features = ["default", "serde", "rows", "strings", "checked_arithmetic", "object", "dtype-duration-ns"] +features = ["default", "serde", "rows", "strings", "checked_arithmetic", "object", "dtype-date", "dtype-datetime", "dtype-time"] [features] dataframe = ["polars"] diff --git a/crates/nu-protocol/src/dataframe/compute_between.rs b/crates/nu-protocol/src/dataframe/compute_between.rs index fa5e02849a..5a0c43b522 100644 --- a/crates/nu-protocol/src/dataframe/compute_between.rs +++ b/crates/nu-protocol/src/dataframe/compute_between.rs @@ -603,7 +603,7 @@ where { match series.dtype() { DataType::UInt32 | DataType::Int32 | DataType::UInt64 => { - let to_i64 = series.cast_with_dtype(&DataType::Int64); + let to_i64 = series.cast(&DataType::Int64); match to_i64 { Ok(series) => { @@ -661,7 +661,7 @@ where { match series.dtype() { DataType::Float32 => { - let to_f64 = series.cast_with_dtype(&DataType::Float64); + let to_f64 = series.cast(&DataType::Float64); match to_f64 { Ok(series) => { @@ -731,7 +731,7 @@ where { match series.dtype() { DataType::UInt32 | DataType::Int32 | DataType::UInt64 => { - let to_i64 = series.cast_with_dtype(&DataType::Int64); + let to_i64 = series.cast(&DataType::Int64); match to_i64 { Ok(series) => { @@ -789,7 +789,7 @@ where { match series.dtype() { DataType::Float32 => { - let to_f64 = series.cast_with_dtype(&DataType::Float64); + let to_f64 = series.cast(&DataType::Float64); match to_f64 { Ok(series) => { diff --git a/crates/nu-protocol/src/dataframe/conversion.rs b/crates/nu-protocol/src/dataframe/conversion.rs index a3b8263a25..6212c4ffae 100644 --- a/crates/nu-protocol/src/dataframe/conversion.rs +++ b/crates/nu-protocol/src/dataframe/conversion.rs @@ -8,8 +8,8 @@ use nu_errors::ShellError; use nu_source::{Span, Tag}; use num_bigint::BigInt; use polars::prelude::{ - DataFrame, DataType, Date64Type, Int64Type, IntoSeries, NamedFrom, NewChunkedArray, ObjectType, - PolarsNumericType, Series, TimeUnit, + DataFrame, DataType, DatetimeChunked, Int64Type, IntoSeries, NamedFrom, NewChunkedArray, + ObjectType, PolarsNumericType, Series, }; use std::ops::{Deref, DerefMut}; @@ -310,8 +310,8 @@ pub fn create_column( } } } - DataType::Date32 => { - let casted = series.date32().map_err(|e| { + DataType::Date => { + let casted = series.date().map_err(|e| { ShellError::labeled_error( "Casting error", format!("casting error: {}", e), @@ -347,8 +347,8 @@ pub fn create_column( Ok(Column::new(casted.name().into(), values)) } - DataType::Date64 => { - let casted = series.date64().map_err(|e| { + DataType::Datetime => { + let casted = series.datetime().map_err(|e| { ShellError::labeled_error( "Casting error", format!("casting error: {}", e), @@ -384,8 +384,8 @@ pub fn create_column( Ok(Column::new(casted.name().into(), values)) } - DataType::Time64(timeunit) | DataType::Duration(timeunit) => { - let casted = series.time64_nanosecond().map_err(|e| { + DataType::Time => { + let casted = series.time().map_err(|e| { ShellError::labeled_error( "Casting error", format!("casting error: {}", e), @@ -398,14 +398,7 @@ pub fn create_column( .skip(from_row) .take(size) .map(|v| match v { - Some(a) => { - let nanoseconds = match timeunit { - TimeUnit::Second => a / 1_000_000_000, - TimeUnit::Millisecond => a / 1_000_000, - TimeUnit::Microsecond => a / 1_000, - TimeUnit::Nanosecond => a, - }; - + Some(nanoseconds) => { let untagged = if let Some(bigint) = BigInt::from_i64(nanoseconds) { UntaggedValue::Primitive(Primitive::Duration(bigint)) } else { @@ -633,7 +626,8 @@ pub fn from_parsed_columns( } }); - let res = ChunkedArray::::new_from_opt_iter(&name, it); + let res: DatetimeChunked = + ChunkedArray::::new_from_opt_iter(&name, it).into(); df_series.push(res.into_series()) } diff --git a/crates/nu-protocol/src/dataframe/nu_dataframe.rs b/crates/nu-protocol/src/dataframe/nu_dataframe.rs index e3d8288857..d5fcf41810 100644 --- a/crates/nu-protocol/src/dataframe/nu_dataframe.rs +++ b/crates/nu-protocol/src/dataframe/nu_dataframe.rs @@ -87,7 +87,7 @@ impl PartialEq for NuDataFrame { // Casting needed to compare other numeric types with nushell numeric type. // In nushell we only have i64 integer numeric types and any array created // with nushell untagged primitives will be of type i64 - DataType::UInt32 => match self_series.cast_with_dtype(&DataType::Int64) { + DataType::UInt32 => match self_series.cast(&DataType::Int64) { Ok(series) => series, Err(_) => return false, },