Groupby operations on dataframes (#3473)
* Added PolarsStruct enum to implement groupby * template groupby * groupby operationi on dataframes
This commit is contained in:
parent
5ab4199d71
commit
e335e4fddc
83
Cargo.lock
generated
83
Cargo.lock
generated
|
@ -211,8 +211,9 @@ checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "arrow"
|
name = "arrow"
|
||||||
version = "4.0.0-SNAPSHOT"
|
version = "4.1.0"
|
||||||
source = "git+https://github.com/apache/arrow-rs?rev=d008f31b107c1030a1f5144c164e8ca8bf543576#d008f31b107c1030a1f5144c164e8ca8bf543576"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "20cdf087304d5cdd743abd621b4b1b388848d29491932dae6f676ec89ebda0ae"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cfg_aliases",
|
"cfg_aliases",
|
||||||
"chrono",
|
"chrono",
|
||||||
|
@ -223,7 +224,7 @@ dependencies = [
|
||||||
"lazy_static 1.4.0",
|
"lazy_static 1.4.0",
|
||||||
"lexical-core",
|
"lexical-core",
|
||||||
"multiversion",
|
"multiversion",
|
||||||
"num 0.3.1",
|
"num 0.4.0",
|
||||||
"rand 0.7.3",
|
"rand 0.7.3",
|
||||||
"regex 1.5.3",
|
"regex 1.5.3",
|
||||||
"serde 1.0.125",
|
"serde 1.0.125",
|
||||||
|
@ -442,12 +443,6 @@ dependencies = [
|
||||||
"safemem 0.3.3",
|
"safemem 0.3.3",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "base64"
|
|
||||||
version = "0.12.3"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "3441f0f7b02788e948e47f457ca01f1d7e6d92c693bc132c22b087d3141c03ff"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "base64"
|
name = "base64"
|
||||||
version = "0.13.0"
|
version = "0.13.0"
|
||||||
|
@ -4091,20 +4086,6 @@ dependencies = [
|
||||||
"num-traits 0.2.14",
|
"num-traits 0.2.14",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "num"
|
|
||||||
version = "0.3.1"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "8b7a8e9be5e039e2ff869df49155f1c06bd01ade2117ec783e56ab0932b67a8f"
|
|
||||||
dependencies = [
|
|
||||||
"num-bigint 0.3.2",
|
|
||||||
"num-complex 0.3.1",
|
|
||||||
"num-integer",
|
|
||||||
"num-iter",
|
|
||||||
"num-rational 0.3.2",
|
|
||||||
"num-traits 0.2.14",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "num"
|
name = "num"
|
||||||
version = "0.4.0"
|
version = "0.4.0"
|
||||||
|
@ -4163,15 +4144,6 @@ dependencies = [
|
||||||
"num-traits 0.2.14",
|
"num-traits 0.2.14",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "num-complex"
|
|
||||||
version = "0.3.1"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "747d632c0c558b87dbabbe6a82f3b4ae03720d0646ac5b7b4dae89394be5f2c5"
|
|
||||||
dependencies = [
|
|
||||||
"num-traits 0.2.14",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "num-complex"
|
name = "num-complex"
|
||||||
version = "0.4.0"
|
version = "0.4.0"
|
||||||
|
@ -4232,7 +4204,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "12ac428b1cb17fce6f731001d307d351ec70a6d202fc2e60f7d4c5e42d8f4f07"
|
checksum = "12ac428b1cb17fce6f731001d307d351ec70a6d202fc2e60f7d4c5e42d8f4f07"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"autocfg",
|
"autocfg",
|
||||||
"num-bigint 0.3.2",
|
|
||||||
"num-integer",
|
"num-integer",
|
||||||
"num-traits 0.2.14",
|
"num-traits 0.2.14",
|
||||||
]
|
]
|
||||||
|
@ -4457,17 +4428,18 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "parquet"
|
name = "parquet"
|
||||||
version = "4.0.0-SNAPSHOT"
|
version = "4.1.0"
|
||||||
source = "git+https://github.com/apache/arrow-rs?rev=d008f31b107c1030a1f5144c164e8ca8bf543576#d008f31b107c1030a1f5144c164e8ca8bf543576"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "193b8db290021fa28a6447df8f433e39b3caab20ee08b874d0a5c1c34aef68de"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow",
|
"arrow",
|
||||||
"base64 0.12.3",
|
"base64 0.13.0",
|
||||||
"brotli",
|
"brotli",
|
||||||
"byteorder",
|
"byteorder",
|
||||||
"chrono",
|
"chrono",
|
||||||
"flate2",
|
"flate2",
|
||||||
"lz4",
|
"lz4",
|
||||||
"num-bigint 0.3.2",
|
"num-bigint 0.4.0",
|
||||||
"parquet-format",
|
"parquet-format",
|
||||||
"snap",
|
"snap",
|
||||||
"thrift",
|
"thrift",
|
||||||
|
@ -4696,8 +4668,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "polars"
|
name = "polars"
|
||||||
version = "0.13.1"
|
version = "0.13.3"
|
||||||
source = "git+https://github.com/ritchie46/polars?rev=3efad9a5c380c64a5eb78b4b7ad257e1e606b9f0#3efad9a5c380c64a5eb78b4b7ad257e1e606b9f0"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "bc4e2e025126632e8e19d53cd9b655da344bd4942ba603ad246c7776b6401844"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"polars-core",
|
"polars-core",
|
||||||
"polars-io",
|
"polars-io",
|
||||||
|
@ -4706,8 +4679,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "polars-arrow"
|
name = "polars-arrow"
|
||||||
version = "0.13.0"
|
version = "0.13.3"
|
||||||
source = "git+https://github.com/ritchie46/polars?rev=3efad9a5c380c64a5eb78b4b7ad257e1e606b9f0#3efad9a5c380c64a5eb78b4b7ad257e1e606b9f0"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c3534c76a7bafaca9c783506a1f331ad746621d3808ab2407c02ffadd9e99326"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrow",
|
"arrow",
|
||||||
"num 0.4.0",
|
"num 0.4.0",
|
||||||
|
@ -4716,8 +4690,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "polars-core"
|
name = "polars-core"
|
||||||
version = "0.13.2"
|
version = "0.13.3"
|
||||||
source = "git+https://github.com/ritchie46/polars?rev=3efad9a5c380c64a5eb78b4b7ad257e1e606b9f0#3efad9a5c380c64a5eb78b4b7ad257e1e606b9f0"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ad76c4d55017da2d0f8930b0caa327d12286c1e4407469f361e84fad176f9601"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"ahash 0.7.2",
|
"ahash 0.7.2",
|
||||||
"anyhow",
|
"anyhow",
|
||||||
|
@ -4739,8 +4714,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "polars-io"
|
name = "polars-io"
|
||||||
version = "0.13.0"
|
version = "0.13.3"
|
||||||
source = "git+https://github.com/ritchie46/polars?rev=3efad9a5c380c64a5eb78b4b7ad257e1e606b9f0#3efad9a5c380c64a5eb78b4b7ad257e1e606b9f0"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "07f20f27363d85f847a2b7e9d1bfd426bff18680691dd42ff17ca91893f12f89"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"ahash 0.7.2",
|
"ahash 0.7.2",
|
||||||
"anyhow",
|
"anyhow",
|
||||||
|
@ -4762,8 +4738,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "polars-lazy"
|
name = "polars-lazy"
|
||||||
version = "0.13.0"
|
version = "0.13.3"
|
||||||
source = "git+https://github.com/ritchie46/polars?rev=3efad9a5c380c64a5eb78b4b7ad257e1e606b9f0#3efad9a5c380c64a5eb78b4b7ad257e1e606b9f0"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "595906f951bacf223625ed6b0e4e73153eb9e251850bb2f9c36d78828334f32b"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"ahash 0.7.2",
|
"ahash 0.7.2",
|
||||||
"itertools",
|
"itertools",
|
||||||
|
@ -7427,18 +7404,18 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "zstd"
|
name = "zstd"
|
||||||
version = "0.7.0+zstd.1.4.9"
|
version = "0.8.1+zstd.1.5.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "9428752481d8372e15b1bf779ea518a179ad6c771cca2d2c60e4fbff3cc2cd52"
|
checksum = "357d6bb1bd9c6f6a55a5a15c74d01260b272f724dc60cc829b86ebd2172ac5ef"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"zstd-safe",
|
"zstd-safe",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "zstd-safe"
|
name = "zstd-safe"
|
||||||
version = "3.1.0+zstd.1.4.9"
|
version = "4.1.0+zstd.1.5.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "5aa1926623ad7fe406e090555387daf73db555b948134b4d73eac5eb08fb666d"
|
checksum = "d30375f78e185ca4c91930f42ea2c0162f9aa29737032501f93b79266d985ae7"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"libc",
|
"libc",
|
||||||
"zstd-sys",
|
"zstd-sys",
|
||||||
|
@ -7446,9 +7423,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "zstd-sys"
|
name = "zstd-sys"
|
||||||
version = "1.5.0+zstd.1.4.9"
|
version = "1.6.0+zstd.1.5.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "4e6c094340240369025fc6b731b054ee2a834328fa584310ac96aa4baebdc465"
|
checksum = "2141bed8922b427761470e6bbfeff255da94fa20b0bbeab0d9297fcaf71e3aa7"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cc",
|
"cc",
|
||||||
"libc",
|
"libc",
|
||||||
|
|
|
@ -99,12 +99,7 @@ uuid_crate = { package = "uuid", version = "0.8.2", features = ["v4"], optional
|
||||||
which = { version = "4.1.0", optional = true }
|
which = { version = "4.1.0", optional = true }
|
||||||
zip = { version = "0.5.9", optional = true }
|
zip = { version = "0.5.9", optional = true }
|
||||||
|
|
||||||
[dependencies.polars]
|
polars = { version = "0.13.3",optional = true, features = ["parquet", "json"] }
|
||||||
version = "0.13.1"
|
|
||||||
git = "https://github.com/ritchie46/polars"
|
|
||||||
rev = "3efad9a5c380c64a5eb78b4b7ad257e1e606b9f0"
|
|
||||||
optional = true
|
|
||||||
features = ["parquet", "json"]
|
|
||||||
|
|
||||||
[target.'cfg(unix)'.dependencies]
|
[target.'cfg(unix)'.dependencies]
|
||||||
umask = "1.0.0"
|
umask = "1.0.0"
|
||||||
|
|
|
@ -187,7 +187,7 @@ pub(crate) mod touch;
|
||||||
pub(crate) use all::Command as All;
|
pub(crate) use all::Command as All;
|
||||||
pub(crate) use any::Command as Any;
|
pub(crate) use any::Command as Any;
|
||||||
#[cfg(feature = "dataframe")]
|
#[cfg(feature = "dataframe")]
|
||||||
pub(crate) use dataframe::{Dataframe, DataframeList, DataframeLoad};
|
pub(crate) use dataframe::{DataFrame, DataFrameGroupBy, DataFrameList, DataFrameLoad};
|
||||||
pub(crate) use enter::Enter;
|
pub(crate) use enter::Enter;
|
||||||
pub(crate) use every::Every;
|
pub(crate) use every::Every;
|
||||||
pub(crate) use exec::Exec;
|
pub(crate) use exec::Exec;
|
||||||
|
|
|
@ -236,7 +236,7 @@ pub fn autoview(args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||||
}
|
}
|
||||||
#[cfg(feature = "dataframe")]
|
#[cfg(feature = "dataframe")]
|
||||||
Value {
|
Value {
|
||||||
value: UntaggedValue::Dataframe(df),
|
value: UntaggedValue::DataFrame(df),
|
||||||
..
|
..
|
||||||
} => {
|
} => {
|
||||||
if let Some(table) = table {
|
if let Some(table) = table {
|
||||||
|
|
|
@ -23,7 +23,7 @@ impl WholeStreamCommand for Command {
|
||||||
let args = args.evaluate_once()?;
|
let args = args.evaluate_once()?;
|
||||||
|
|
||||||
let df = NuDataFrame::try_from_iter(args.input, &tag)?;
|
let df = NuDataFrame::try_from_iter(args.input, &tag)?;
|
||||||
let init = InputStream::one(UntaggedValue::Dataframe(df).into_value(&tag));
|
let init = InputStream::one(UntaggedValue::DataFrame(df).into_value(&tag));
|
||||||
|
|
||||||
Ok(init.to_output_stream())
|
Ok(init.to_output_stream())
|
||||||
}
|
}
|
||||||
|
|
262
crates/nu-command/src/commands/dataframe/groupby.rs
Normal file
262
crates/nu-command/src/commands/dataframe/groupby.rs
Normal file
|
@ -0,0 +1,262 @@
|
||||||
|
use crate::prelude::*;
|
||||||
|
use nu_engine::WholeStreamCommand;
|
||||||
|
use nu_errors::ShellError;
|
||||||
|
use nu_protocol::{
|
||||||
|
dataframe::NuDataFrame, Primitive, Signature, SyntaxShape, UntaggedValue, Value,
|
||||||
|
};
|
||||||
|
use nu_source::Tagged;
|
||||||
|
use polars::frame::groupby::GroupBy;
|
||||||
|
|
||||||
|
enum Operation {
|
||||||
|
Mean,
|
||||||
|
Sum,
|
||||||
|
Min,
|
||||||
|
Max,
|
||||||
|
First,
|
||||||
|
Last,
|
||||||
|
Nunique,
|
||||||
|
Quantile(f64),
|
||||||
|
Median,
|
||||||
|
//Var,
|
||||||
|
//Std,
|
||||||
|
Count,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Operation {
|
||||||
|
fn from_tagged(
|
||||||
|
name: &Tagged<String>,
|
||||||
|
quantile: Option<Tagged<f64>>,
|
||||||
|
) -> Result<Operation, ShellError> {
|
||||||
|
match name.item.as_ref() {
|
||||||
|
"mean" => Ok(Operation::Mean),
|
||||||
|
"sum" => Ok(Operation::Sum),
|
||||||
|
"min" => Ok(Operation::Min),
|
||||||
|
"max" => Ok(Operation::Max),
|
||||||
|
"first" => Ok(Operation::First),
|
||||||
|
"last" => Ok(Operation::Last),
|
||||||
|
"nunique" => Ok(Operation::Nunique),
|
||||||
|
"quantile" => {
|
||||||
|
match quantile {
|
||||||
|
None => Err(ShellError::labeled_error(
|
||||||
|
"Quantile value not fount",
|
||||||
|
"Quantile operation requires quantile value",
|
||||||
|
&name.tag,
|
||||||
|
)),
|
||||||
|
Some(value ) => {
|
||||||
|
if (value.item < 0.0) | (value.item > 1.0) {
|
||||||
|
Err(ShellError::labeled_error(
|
||||||
|
"Inappropriate quantile",
|
||||||
|
"Quantile value should be between 0.0 and 1.0",
|
||||||
|
&value.tag,
|
||||||
|
))
|
||||||
|
} else {
|
||||||
|
Ok(Operation::Quantile(value.item))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"median" => Ok(Operation::Median),
|
||||||
|
//"var" => Ok(Operation::Var),
|
||||||
|
//"std" => Ok(Operation::Std),
|
||||||
|
"count" => Ok(Operation::Count),
|
||||||
|
_ => Err(ShellError::labeled_error_with_secondary(
|
||||||
|
"Operation not fount",
|
||||||
|
"Operation does not exist",
|
||||||
|
&name.tag,
|
||||||
|
"Perhaps you want: mean, sum, min, max, first, last, nunique, quantile, median, count",
|
||||||
|
&name.tag,
|
||||||
|
)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct DataFrame;
|
||||||
|
|
||||||
|
impl WholeStreamCommand for DataFrame {
|
||||||
|
fn name(&self) -> &str {
|
||||||
|
"dataframe groupby"
|
||||||
|
}
|
||||||
|
|
||||||
|
fn usage(&self) -> &str {
|
||||||
|
"Creates a groupby operation on a dataframe"
|
||||||
|
}
|
||||||
|
|
||||||
|
fn signature(&self) -> Signature {
|
||||||
|
Signature::build("dataframe groupby")
|
||||||
|
.required("columns", SyntaxShape::Table, "groupby columns")
|
||||||
|
.required(
|
||||||
|
"aggregation columns",
|
||||||
|
SyntaxShape::Table,
|
||||||
|
"columns to perform aggregation",
|
||||||
|
)
|
||||||
|
.required("operation", SyntaxShape::String, "aggregate operation")
|
||||||
|
.named(
|
||||||
|
"quantile",
|
||||||
|
SyntaxShape::Number,
|
||||||
|
"auantile value for quantile operation",
|
||||||
|
Some('q'),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn run(&self, args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||||
|
groupby(args)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn examples(&self) -> Vec<Example> {
|
||||||
|
vec![Example {
|
||||||
|
description: "",
|
||||||
|
example: "",
|
||||||
|
result: None,
|
||||||
|
}]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn groupby(args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||||
|
let tag = args.call_info.name_tag.clone();
|
||||||
|
let mut args = args.evaluate_once()?;
|
||||||
|
|
||||||
|
let quantile: Option<Tagged<f64>> = args.get_flag("quantile")?;
|
||||||
|
let operation: Tagged<String> = args.req(2)?;
|
||||||
|
let op = Operation::from_tagged(&operation, quantile)?;
|
||||||
|
|
||||||
|
// Extracting the names of the columns to perform the groupby
|
||||||
|
let columns: Vec<Value> = args.req(0)?;
|
||||||
|
|
||||||
|
// Extracting the first tag from the groupby column names
|
||||||
|
let mut col_span = match columns
|
||||||
|
.iter()
|
||||||
|
.nth(0)
|
||||||
|
.map(|v| Span::new(v.tag.span.start(), v.tag.span.end()))
|
||||||
|
{
|
||||||
|
Some(span) => span,
|
||||||
|
None => {
|
||||||
|
return Err(ShellError::labeled_error(
|
||||||
|
"Empty groupby names list",
|
||||||
|
"Empty list for groupby column names",
|
||||||
|
&tag,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let columns_string = columns
|
||||||
|
.into_iter()
|
||||||
|
.map(|value| match value.value {
|
||||||
|
UntaggedValue::Primitive(Primitive::String(s)) => {
|
||||||
|
col_span = col_span.until(value.tag.span);
|
||||||
|
Ok(s)
|
||||||
|
}
|
||||||
|
_ => Err(ShellError::labeled_error(
|
||||||
|
"Incorrect column format",
|
||||||
|
"Only string as column name",
|
||||||
|
&value.tag,
|
||||||
|
)),
|
||||||
|
})
|
||||||
|
.collect::<Result<Vec<String>, _>>()?;
|
||||||
|
|
||||||
|
// Extracting the names of the columns to perform the aggregation
|
||||||
|
let agg_cols: Vec<Value> = args.req(1)?;
|
||||||
|
|
||||||
|
// Extracting the first tag from the aggregation column names
|
||||||
|
let mut agg_span = match agg_cols
|
||||||
|
.iter()
|
||||||
|
.nth(0)
|
||||||
|
.map(|v| Span::new(v.tag.span.start(), v.tag.span.end()))
|
||||||
|
{
|
||||||
|
Some(span) => span,
|
||||||
|
None => {
|
||||||
|
return Err(ShellError::labeled_error(
|
||||||
|
"Empty aggregation names list",
|
||||||
|
"Empty list for aggregation column names",
|
||||||
|
&tag,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let aggregation_string = agg_cols
|
||||||
|
.into_iter()
|
||||||
|
.map(|value| match value.value {
|
||||||
|
UntaggedValue::Primitive(Primitive::String(s)) => {
|
||||||
|
agg_span = agg_span.until(value.tag.span);
|
||||||
|
Ok(s)
|
||||||
|
}
|
||||||
|
_ => Err(ShellError::labeled_error(
|
||||||
|
"Incorrect column format",
|
||||||
|
"Only string as column name",
|
||||||
|
value.tag,
|
||||||
|
)),
|
||||||
|
})
|
||||||
|
.collect::<Result<Vec<String>, _>>()?;
|
||||||
|
|
||||||
|
// The operation is only done in one dataframe. Only one input is
|
||||||
|
// expected from the InputStream
|
||||||
|
match args.input.next() {
|
||||||
|
None => Err(ShellError::labeled_error(
|
||||||
|
"No input received",
|
||||||
|
"missing dataframe input from stream",
|
||||||
|
&tag,
|
||||||
|
)),
|
||||||
|
Some(value) => {
|
||||||
|
if let UntaggedValue::DataFrame(NuDataFrame {
|
||||||
|
dataframe: Some(df),
|
||||||
|
..
|
||||||
|
}) = value.value
|
||||||
|
{
|
||||||
|
let groupby = df
|
||||||
|
.groupby(&columns_string)
|
||||||
|
.map_err(|e| {
|
||||||
|
ShellError::labeled_error("Groupby error", format!("{}", e), col_span)
|
||||||
|
})?
|
||||||
|
.select(&aggregation_string);
|
||||||
|
|
||||||
|
let res = perform_aggregation(groupby, op, &operation.tag, &agg_span)?;
|
||||||
|
|
||||||
|
let final_df = Value {
|
||||||
|
tag,
|
||||||
|
value: UntaggedValue::DataFrame(NuDataFrame {
|
||||||
|
dataframe: Some(res),
|
||||||
|
name: "agg result".to_string(),
|
||||||
|
}),
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(OutputStream::one(final_df))
|
||||||
|
} else {
|
||||||
|
Err(ShellError::labeled_error(
|
||||||
|
"No dataframe in stream",
|
||||||
|
"no dataframe found in input stream",
|
||||||
|
&tag,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn perform_aggregation(
|
||||||
|
groupby: GroupBy,
|
||||||
|
operation: Operation,
|
||||||
|
operation_tag: &Tag,
|
||||||
|
agg_span: &Span,
|
||||||
|
) -> Result<polars::prelude::DataFrame, ShellError> {
|
||||||
|
match operation {
|
||||||
|
Operation::Mean => groupby.mean(),
|
||||||
|
Operation::Sum => groupby.sum(),
|
||||||
|
Operation::Min => groupby.min(),
|
||||||
|
Operation::Max => groupby.max(),
|
||||||
|
Operation::First => groupby.first(),
|
||||||
|
Operation::Last => groupby.last(),
|
||||||
|
Operation::Nunique => groupby.n_unique(),
|
||||||
|
Operation::Quantile(quantile) => groupby.quantile(quantile),
|
||||||
|
Operation::Median => groupby.median(),
|
||||||
|
//Operation::Var => groupby.var(),
|
||||||
|
//Operation::Std => groupby.std(),
|
||||||
|
Operation::Count => groupby.count(),
|
||||||
|
}
|
||||||
|
.map_err(|e| {
|
||||||
|
let span = if e.to_string().contains("Not found") {
|
||||||
|
agg_span
|
||||||
|
} else {
|
||||||
|
&operation_tag.span
|
||||||
|
};
|
||||||
|
|
||||||
|
ShellError::labeled_error("Aggregation error", format!("{}", e), span)
|
||||||
|
})
|
||||||
|
}
|
|
@ -3,9 +3,9 @@ use nu_engine::WholeStreamCommand;
|
||||||
use nu_errors::ShellError;
|
use nu_errors::ShellError;
|
||||||
use nu_protocol::{Signature, TaggedDictBuilder, UntaggedValue, Value};
|
use nu_protocol::{Signature, TaggedDictBuilder, UntaggedValue, Value};
|
||||||
|
|
||||||
pub struct Dataframe;
|
pub struct DataFrame;
|
||||||
|
|
||||||
impl WholeStreamCommand for Dataframe {
|
impl WholeStreamCommand for DataFrame {
|
||||||
fn name(&self) -> &str {
|
fn name(&self) -> &str {
|
||||||
"dataframe list"
|
"dataframe list"
|
||||||
}
|
}
|
||||||
|
@ -23,7 +23,7 @@ impl WholeStreamCommand for Dataframe {
|
||||||
|
|
||||||
let mut dataframes: Vec<Value> = Vec::new();
|
let mut dataframes: Vec<Value> = Vec::new();
|
||||||
for (name, value) in args.context.scope.get_vars() {
|
for (name, value) in args.context.scope.get_vars() {
|
||||||
if let UntaggedValue::Dataframe(df) = value.value {
|
if let UntaggedValue::DataFrame(df) = value.value {
|
||||||
let mut data = TaggedDictBuilder::new(value.tag);
|
let mut data = TaggedDictBuilder::new(value.tag);
|
||||||
|
|
||||||
let polars_df = df.dataframe.unwrap();
|
let polars_df = df.dataframe.unwrap();
|
||||||
|
|
|
@ -11,9 +11,9 @@ use nu_source::Tagged;
|
||||||
use polars::prelude::{CsvReader, JsonReader, ParquetReader, SerReader};
|
use polars::prelude::{CsvReader, JsonReader, ParquetReader, SerReader};
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
|
|
||||||
pub struct Dataframe;
|
pub struct DataFrame;
|
||||||
|
|
||||||
impl WholeStreamCommand for Dataframe {
|
impl WholeStreamCommand for DataFrame {
|
||||||
fn name(&self) -> &str {
|
fn name(&self) -> &str {
|
||||||
"dataframe load"
|
"dataframe load"
|
||||||
}
|
}
|
||||||
|
@ -112,7 +112,7 @@ fn create_from_file(args: CommandArgs) -> Result<OutputStream, ShellError> {
|
||||||
name: file_name,
|
name: file_name,
|
||||||
};
|
};
|
||||||
|
|
||||||
let init = InputStream::one(UntaggedValue::Dataframe(nu_dataframe).into_value(&tag));
|
let init = InputStream::one(UntaggedValue::DataFrame(nu_dataframe).into_value(&tag));
|
||||||
|
|
||||||
Ok(init.to_output_stream())
|
Ok(init.to_output_stream())
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,7 +1,9 @@
|
||||||
pub mod command;
|
pub mod command;
|
||||||
|
pub mod groupby;
|
||||||
pub mod list;
|
pub mod list;
|
||||||
pub mod load;
|
pub mod load;
|
||||||
|
|
||||||
pub use command::Command as Dataframe;
|
pub use command::Command as DataFrame;
|
||||||
pub use list::Dataframe as DataframeList;
|
pub use groupby::DataFrame as DataFrameGroupBy;
|
||||||
pub use load::Dataframe as DataframeLoad;
|
pub use list::DataFrame as DataFrameList;
|
||||||
|
pub use load::DataFrame as DataFrameLoad;
|
||||||
|
|
|
@ -253,11 +253,13 @@ pub fn create_default_context(interactive: bool) -> Result<EvaluationContext, Bo
|
||||||
whole_stream_command(SeqDates),
|
whole_stream_command(SeqDates),
|
||||||
whole_stream_command(TermSize),
|
whole_stream_command(TermSize),
|
||||||
#[cfg(feature = "dataframe")]
|
#[cfg(feature = "dataframe")]
|
||||||
whole_stream_command(Dataframe),
|
whole_stream_command(DataFrame),
|
||||||
#[cfg(feature = "dataframe")]
|
#[cfg(feature = "dataframe")]
|
||||||
whole_stream_command(DataframeLoad),
|
whole_stream_command(DataFrameLoad),
|
||||||
#[cfg(feature = "dataframe")]
|
#[cfg(feature = "dataframe")]
|
||||||
whole_stream_command(DataframeList),
|
whole_stream_command(DataFrameList),
|
||||||
|
#[cfg(feature = "dataframe")]
|
||||||
|
whole_stream_command(DataFrameGroupBy),
|
||||||
]);
|
]);
|
||||||
|
|
||||||
#[cfg(feature = "clipboard-cli")]
|
#[cfg(feature = "clipboard-cli")]
|
||||||
|
|
|
@ -115,7 +115,7 @@ pub fn value_to_json_value(v: &Value) -> Result<serde_json::Value, ShellError> {
|
||||||
serde_json::Value::Null
|
serde_json::Value::Null
|
||||||
}
|
}
|
||||||
#[cfg(feature = "dataframe")]
|
#[cfg(feature = "dataframe")]
|
||||||
UntaggedValue::Dataframe(_) => serde_json::Value::Null,
|
UntaggedValue::DataFrame(_) => serde_json::Value::Null,
|
||||||
UntaggedValue::Primitive(Primitive::Binary(b)) => serde_json::Value::Array(
|
UntaggedValue::Primitive(Primitive::Binary(b)) => serde_json::Value::Array(
|
||||||
b.iter()
|
b.iter()
|
||||||
.map(|x| {
|
.map(|x| {
|
||||||
|
|
|
@ -74,7 +74,7 @@ fn helper(v: &Value) -> Result<toml::Value, ShellError> {
|
||||||
UntaggedValue::Error(e) => return Err(e.clone()),
|
UntaggedValue::Error(e) => return Err(e.clone()),
|
||||||
UntaggedValue::Block(_) => toml::Value::String("<Block>".to_string()),
|
UntaggedValue::Block(_) => toml::Value::String("<Block>".to_string()),
|
||||||
#[cfg(feature = "dataframe")]
|
#[cfg(feature = "dataframe")]
|
||||||
UntaggedValue::Dataframe(_) => toml::Value::String("<Dataframe>".to_string()),
|
UntaggedValue::DataFrame(_) => toml::Value::String("<Data>".to_string()),
|
||||||
UntaggedValue::Primitive(Primitive::Range(_)) => toml::Value::String("<Range>".to_string()),
|
UntaggedValue::Primitive(Primitive::Range(_)) => toml::Value::String("<Range>".to_string()),
|
||||||
UntaggedValue::Primitive(Primitive::Binary(b)) => {
|
UntaggedValue::Primitive(Primitive::Binary(b)) => {
|
||||||
toml::Value::Array(b.iter().map(|x| toml::Value::Integer(*x as i64)).collect())
|
toml::Value::Array(b.iter().map(|x| toml::Value::Integer(*x as i64)).collect())
|
||||||
|
|
|
@ -96,7 +96,7 @@ pub fn value_to_yaml_value(v: &Value) -> Result<serde_yaml::Value, ShellError> {
|
||||||
serde_yaml::Value::Null
|
serde_yaml::Value::Null
|
||||||
}
|
}
|
||||||
#[cfg(feature = "dataframe")]
|
#[cfg(feature = "dataframe")]
|
||||||
UntaggedValue::Dataframe(_) => serde_yaml::Value::Null,
|
UntaggedValue::DataFrame(_) => serde_yaml::Value::Null,
|
||||||
UntaggedValue::Primitive(Primitive::Binary(b)) => serde_yaml::Value::Sequence(
|
UntaggedValue::Primitive(Primitive::Binary(b)) => serde_yaml::Value::Sequence(
|
||||||
b.iter()
|
b.iter()
|
||||||
.map(|x| serde_yaml::Value::Number(serde_yaml::Number::from(*x)))
|
.map(|x| serde_yaml::Value::Number(serde_yaml::Number::from(*x)))
|
||||||
|
|
|
@ -156,9 +156,9 @@ fn uniq(args: CommandArgs) -> Result<ActionStream, ShellError> {
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
#[cfg(feature = "dataframe")]
|
#[cfg(feature = "dataframe")]
|
||||||
UntaggedValue::Dataframe(_) => {
|
UntaggedValue::DataFrame(_) => {
|
||||||
return Err(ShellError::labeled_error(
|
return Err(ShellError::labeled_error(
|
||||||
"uniq -c cannot operate on dataframes.",
|
"uniq -c cannot operate on data structs",
|
||||||
"source",
|
"source",
|
||||||
item.0.tag.span,
|
item.0.tag.span,
|
||||||
))
|
))
|
||||||
|
|
|
@ -46,7 +46,7 @@ pub enum InlineShape {
|
||||||
|
|
||||||
// TODO: Dataframe type
|
// TODO: Dataframe type
|
||||||
#[cfg(feature = "dataframe")]
|
#[cfg(feature = "dataframe")]
|
||||||
Dataframe,
|
DataFrame,
|
||||||
|
|
||||||
// Stream markers (used as bookend markers rather than actual values)
|
// Stream markers (used as bookend markers rather than actual values)
|
||||||
BeginningOfStream,
|
BeginningOfStream,
|
||||||
|
@ -130,7 +130,7 @@ impl InlineShape {
|
||||||
UntaggedValue::Error(_) => InlineShape::Error,
|
UntaggedValue::Error(_) => InlineShape::Error,
|
||||||
UntaggedValue::Block(_) => InlineShape::Block,
|
UntaggedValue::Block(_) => InlineShape::Block,
|
||||||
#[cfg(feature = "dataframe")]
|
#[cfg(feature = "dataframe")]
|
||||||
UntaggedValue::Dataframe(_) => InlineShape::Dataframe,
|
UntaggedValue::DataFrame(_) => InlineShape::DataFrame,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -322,7 +322,7 @@ impl PrettyDebug for FormatInlineShape {
|
||||||
InlineShape::Block => DbgDocBldr::opaque("block"),
|
InlineShape::Block => DbgDocBldr::opaque("block"),
|
||||||
InlineShape::Error => DbgDocBldr::error("error"),
|
InlineShape::Error => DbgDocBldr::error("error"),
|
||||||
#[cfg(feature = "dataframe")]
|
#[cfg(feature = "dataframe")]
|
||||||
InlineShape::Dataframe => DbgDocBldr::error("dataframe_pretty_FormatInlineShape"),
|
InlineShape::DataFrame => DbgDocBldr::error("dataframe_pretty_formatter"),
|
||||||
InlineShape::BeginningOfStream => DbgDocBldr::blank(),
|
InlineShape::BeginningOfStream => DbgDocBldr::blank(),
|
||||||
InlineShape::EndOfStream => DbgDocBldr::blank(),
|
InlineShape::EndOfStream => DbgDocBldr::blank(),
|
||||||
}
|
}
|
||||||
|
|
|
@ -117,7 +117,7 @@ fn helper(v: &Value) -> Result<toml::Value, ShellError> {
|
||||||
UntaggedValue::Error(e) => return Err(e.clone()),
|
UntaggedValue::Error(e) => return Err(e.clone()),
|
||||||
UntaggedValue::Block(_) => toml::Value::String("<Block>".to_string()),
|
UntaggedValue::Block(_) => toml::Value::String("<Block>".to_string()),
|
||||||
#[cfg(feature = "dataframe")]
|
#[cfg(feature = "dataframe")]
|
||||||
UntaggedValue::Dataframe(_) => toml::Value::String("<Dataframe>".to_string()),
|
UntaggedValue::DataFrame(_) => toml::Value::String("<Data>".to_string()),
|
||||||
UntaggedValue::Primitive(Primitive::Range(_)) => toml::Value::String("<Range>".to_string()),
|
UntaggedValue::Primitive(Primitive::Range(_)) => toml::Value::String("<Range>".to_string()),
|
||||||
UntaggedValue::Primitive(Primitive::Binary(b)) => {
|
UntaggedValue::Primitive(Primitive::Binary(b)) => {
|
||||||
toml::Value::Array(b.iter().map(|x| toml::Value::Integer(*x as i64)).collect())
|
toml::Value::Array(b.iter().map(|x| toml::Value::Integer(*x as i64)).collect())
|
||||||
|
|
|
@ -25,7 +25,7 @@ num-traits = "0.2.14"
|
||||||
serde = { version = "1.0", features = ["derive"] }
|
serde = { version = "1.0", features = ["derive"] }
|
||||||
serde_bytes = "0.11.5"
|
serde_bytes = "0.11.5"
|
||||||
|
|
||||||
polars = {version="0.13.1", git = "https://github.com/ritchie46/polars", rev = "3efad9a5c380c64a5eb78b4b7ad257e1e606b9f0", optional = true}
|
polars = {version="0.13.3", optional = true}
|
||||||
|
|
||||||
# implement conversions
|
# implement conversions
|
||||||
serde_json = "1.0"
|
serde_json = "1.0"
|
||||||
|
|
|
@ -74,7 +74,7 @@ pub enum Type {
|
||||||
|
|
||||||
/// Dataframe
|
/// Dataframe
|
||||||
#[cfg(feature = "dataframe")]
|
#[cfg(feature = "dataframe")]
|
||||||
Dataframe,
|
DataFrame,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A shape representation of the type of a row
|
/// A shape representation of the type of a row
|
||||||
|
@ -191,7 +191,7 @@ impl Type {
|
||||||
UntaggedValue::Error(_) => Type::Error,
|
UntaggedValue::Error(_) => Type::Error,
|
||||||
UntaggedValue::Block(_) => Type::Block,
|
UntaggedValue::Block(_) => Type::Block,
|
||||||
#[cfg(feature = "dataframe")]
|
#[cfg(feature = "dataframe")]
|
||||||
UntaggedValue::Dataframe(_) => Type::Dataframe,
|
UntaggedValue::DataFrame(_) => Type::DataFrame,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -298,7 +298,7 @@ impl PrettyDebug for Type {
|
||||||
}
|
}
|
||||||
Type::Block => ty("block"),
|
Type::Block => ty("block"),
|
||||||
#[cfg(feature = "dataframe")]
|
#[cfg(feature = "dataframe")]
|
||||||
Type::Dataframe => ty("dataframe_pretty_debug_for_Type"),
|
Type::DataFrame => ty("data_type_formatter"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -51,9 +51,10 @@ pub enum UntaggedValue {
|
||||||
/// A block of Nu code, eg `{ ls | get name ; echo "done" }` with its captured values
|
/// A block of Nu code, eg `{ ls | get name ; echo "done" }` with its captured values
|
||||||
Block(Box<hir::CapturedBlock>),
|
Block(Box<hir::CapturedBlock>),
|
||||||
|
|
||||||
/// NuDataframe
|
/// Data option that holds the polars structs required to to data
|
||||||
|
/// manipulation and operations using polars dataframes
|
||||||
#[cfg(feature = "dataframe")]
|
#[cfg(feature = "dataframe")]
|
||||||
Dataframe(NuDataFrame),
|
DataFrame(NuDataFrame),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl UntaggedValue {
|
impl UntaggedValue {
|
||||||
|
@ -671,7 +672,7 @@ impl ShellTypeName for UntaggedValue {
|
||||||
UntaggedValue::Error(_) => "error",
|
UntaggedValue::Error(_) => "error",
|
||||||
UntaggedValue::Block(_) => "block",
|
UntaggedValue::Block(_) => "block",
|
||||||
#[cfg(feature = "dataframe")]
|
#[cfg(feature = "dataframe")]
|
||||||
UntaggedValue::Dataframe(_) => "dataframe",
|
UntaggedValue::DataFrame(_) => "dataframe",
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,7 +25,7 @@ impl PrettyDebug for Value {
|
||||||
UntaggedValue::Error(_) => DbgDocBldr::error("error"),
|
UntaggedValue::Error(_) => DbgDocBldr::error("error"),
|
||||||
UntaggedValue::Block(_) => DbgDocBldr::opaque("block"),
|
UntaggedValue::Block(_) => DbgDocBldr::opaque("block"),
|
||||||
#[cfg(feature = "dataframe")]
|
#[cfg(feature = "dataframe")]
|
||||||
UntaggedValue::Dataframe(_) => DbgDocBldr::opaque("dataframe_prettydebug_for_Value"),
|
UntaggedValue::DataFrame(_) => DbgDocBldr::opaque("dataframe_prettydebug_for_data"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -725,7 +725,7 @@ pub fn get_data<'value>(value: &'value Value, desc: &str) -> MaybeOwned<'value,
|
||||||
MaybeOwned::Owned(UntaggedValue::nothing().into_untagged_value())
|
MaybeOwned::Owned(UntaggedValue::nothing().into_untagged_value())
|
||||||
}
|
}
|
||||||
#[cfg(feature = "dataframe")]
|
#[cfg(feature = "dataframe")]
|
||||||
UntaggedValue::Dataframe(_) => {
|
UntaggedValue::DataFrame(_) => {
|
||||||
MaybeOwned::Owned(UntaggedValue::nothing().into_untagged_value())
|
MaybeOwned::Owned(UntaggedValue::nothing().into_untagged_value())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -401,10 +401,10 @@ pub fn value_to_json_value(v: &Value) -> Result<serde_json::Value, ShellError> {
|
||||||
|
|
||||||
UntaggedValue::Table(l) => serde_json::Value::Array(json_list(l)?),
|
UntaggedValue::Table(l) => serde_json::Value::Array(json_list(l)?),
|
||||||
#[cfg(feature = "dataframe")]
|
#[cfg(feature = "dataframe")]
|
||||||
UntaggedValue::Dataframe(_) => {
|
UntaggedValue::DataFrame(_) => {
|
||||||
return Err(ShellError::labeled_error(
|
return Err(ShellError::labeled_error(
|
||||||
"Cannot convert dataframe",
|
"Cannot convert data struct",
|
||||||
"Cannot convert dataframe",
|
"Cannot convert data struct",
|
||||||
&v.tag,
|
&v.tag,
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
|
@ -64,7 +64,7 @@ pub fn value_to_bson_value(v: &Value) -> Result<Bson, ShellError> {
|
||||||
),
|
),
|
||||||
UntaggedValue::Block(_) | UntaggedValue::Primitive(Primitive::Range(_)) => Bson::Null,
|
UntaggedValue::Block(_) | UntaggedValue::Primitive(Primitive::Range(_)) => Bson::Null,
|
||||||
#[cfg(feature = "dataframe")]
|
#[cfg(feature = "dataframe")]
|
||||||
UntaggedValue::Dataframe(_) => Bson::Null,
|
UntaggedValue::DataFrame(_) => Bson::Null,
|
||||||
UntaggedValue::Error(e) => return Err(e.clone()),
|
UntaggedValue::Error(e) => return Err(e.clone()),
|
||||||
UntaggedValue::Primitive(Primitive::Binary(b)) => {
|
UntaggedValue::Primitive(Primitive::Binary(b)) => {
|
||||||
Bson::Binary(BinarySubtype::Generic, b.clone())
|
Bson::Binary(BinarySubtype::Generic, b.clone())
|
||||||
|
|
Loading…
Reference in New Issue
Block a user