nushell/crates/nu-cmd-dataframe/src/dataframe/eager/sample.rs
Ian Manske c747ec75c9
Add command_prelude module (#12291)
# Description
When implementing a `Command`, one must also import all the types
present in the function signatures for `Command`. This makes it so that
we often import the same set of types in each command implementation
file. E.g., something like this:
```rust
use nu_protocol::ast::Call;
use nu_protocol::engine::{Command, EngineState, Stack};
use nu_protocol::{
    record, Category, Example, IntoInterruptiblePipelineData, IntoPipelineData, PipelineData,
    ShellError, Signature, Span, Type, Value,
};
```

This PR adds the `nu_engine::command_prelude` module which contains the
necessary and commonly used types to implement a `Command`:
```rust
// command_prelude.rs
pub use crate::CallExt;
pub use nu_protocol::{
    ast::{Call, CellPath},
    engine::{Command, EngineState, Stack},
    record, Category, Example, IntoInterruptiblePipelineData, IntoPipelineData, IntoSpanned,
    PipelineData, Record, ShellError, Signature, Span, Spanned, SyntaxShape, Type, Value,
};
```

This should reduce the boilerplate needed to implement a command and
also gives us a place to track the breadth of the `Command` API. I tried
to be conservative with what went into the prelude modules, since it
might be hard/annoying to remove items from the prelude in the future.
Let me know if something should be included or excluded.
2024-03-26 21:17:30 +00:00

128 lines
4.2 KiB
Rust

use crate::dataframe::values::NuDataFrame;
use nu_engine::command_prelude::*;
use polars::{prelude::NamedFrom, series::Series};
#[derive(Clone)]
pub struct SampleDF;
impl Command for SampleDF {
fn name(&self) -> &str {
"dfr sample"
}
fn usage(&self) -> &str {
"Create sample dataframe."
}
fn signature(&self) -> Signature {
Signature::build(self.name())
.named(
"n-rows",
SyntaxShape::Int,
"number of rows to be taken from dataframe",
Some('n'),
)
.named(
"fraction",
SyntaxShape::Number,
"fraction of dataframe to be taken",
Some('f'),
)
.named(
"seed",
SyntaxShape::Number,
"seed for the selection",
Some('s'),
)
.switch("replace", "sample with replace", Some('e'))
.switch("shuffle", "shuffle sample", Some('u'))
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Sample rows from dataframe",
example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr sample --n-rows 1",
result: None, // No expected value because sampling is random
},
Example {
description: "Shows sample row using fraction and replace",
example:
"[[a b]; [1 2] [3 4] [5 6]] | dfr into-df | dfr sample --fraction 0.5 --replace",
result: None, // No expected value because sampling is random
},
]
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
command(engine_state, stack, call, input)
}
}
fn command(
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let rows: Option<Spanned<i64>> = call.get_flag(engine_state, stack, "n-rows")?;
let fraction: Option<Spanned<f64>> = call.get_flag(engine_state, stack, "fraction")?;
let seed: Option<u64> = call
.get_flag::<i64>(engine_state, stack, "seed")?
.map(|val| val as u64);
let replace: bool = call.has_flag(engine_state, stack, "replace")?;
let shuffle: bool = call.has_flag(engine_state, stack, "shuffle")?;
let df = NuDataFrame::try_from_pipeline(input, call.head)?;
match (rows, fraction) {
(Some(rows), None) => df
.as_ref()
.sample_n(&Series::new("s", &[rows.item]), replace, shuffle, seed)
.map_err(|e| ShellError::GenericError {
error: "Error creating sample".into(),
msg: e.to_string(),
span: Some(rows.span),
help: None,
inner: vec![],
}),
(None, Some(frac)) => df
.as_ref()
.sample_frac(&Series::new("frac", &[frac.item]), replace, shuffle, seed)
.map_err(|e| ShellError::GenericError {
error: "Error creating sample".into(),
msg: e.to_string(),
span: Some(frac.span),
help: None,
inner: vec![],
}),
(Some(_), Some(_)) => Err(ShellError::GenericError {
error: "Incompatible flags".into(),
msg: "Only one selection criterion allowed".into(),
span: Some(call.head),
help: None,
inner: vec![],
}),
(None, None) => Err(ShellError::GenericError {
error: "No selection".into(),
msg: "No selection criterion was found".into(),
span: Some(call.head),
help: Some("Perhaps you want to use the flag -n or -f".into()),
inner: vec![],
}),
}
.map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None))
}