From b9331d1b0816b3fddfb187028a4f147c835a423e Mon Sep 17 00:00:00 2001 From: Ian Manske Date: Tue, 7 May 2024 12:52:02 +0000 Subject: [PATCH 001/137] Add `sys users` command (#12787) # Description Add a new `sys users` command which returns a table of the users of the system. This is the same table that is currently present as `(sys).host.sessions`. The same table has been removed from the recently added `sys host` command. # User-Facing Changes Adds a new command. (The old `sys` command is left as is.) --- crates/nu-command/src/default_context.rs | 1 + crates/nu-command/src/system/sys/host.rs | 3 +- crates/nu-command/src/system/sys/mod.rs | 48 ++++++++++++----------- crates/nu-command/src/system/sys/sys_.rs | 5 ++- crates/nu-command/src/system/sys/users.rs | 38 ++++++++++++++++++ 5 files changed, 71 insertions(+), 24 deletions(-) create mode 100644 crates/nu-command/src/system/sys/users.rs diff --git a/crates/nu-command/src/default_context.rs b/crates/nu-command/src/default_context.rs index d8307be84d..ba55472e15 100644 --- a/crates/nu-command/src/default_context.rs +++ b/crates/nu-command/src/default_context.rs @@ -125,6 +125,7 @@ pub fn add_shell_command_context(mut engine_state: EngineState) -> EngineState { SysMem, SysNet, SysTemp, + SysUsers, UName, }; diff --git a/crates/nu-command/src/system/sys/host.rs b/crates/nu-command/src/system/sys/host.rs index 23508b5b6e..969f59ef99 100644 --- a/crates/nu-command/src/system/sys/host.rs +++ b/crates/nu-command/src/system/sys/host.rs @@ -26,7 +26,8 @@ impl Command for SysHost { call: &Call, _input: PipelineData, ) -> Result { - Ok(super::host(call.head).into_pipeline_data()) + let host = super::host(call.head); + Ok(Value::record(host, call.head).into_pipeline_data()) } fn examples(&self) -> Vec { diff --git a/crates/nu-command/src/system/sys/mod.rs b/crates/nu-command/src/system/sys/mod.rs index 0c61543e7d..5e0467c251 100644 --- a/crates/nu-command/src/system/sys/mod.rs +++ b/crates/nu-command/src/system/sys/mod.rs @@ -5,6 +5,7 @@ mod mem; mod net; mod sys_; mod temp; +mod users; pub use cpu::SysCpu; pub use disks::SysDisks; @@ -13,6 +14,7 @@ pub use mem::SysMem; pub use net::SysNet; pub use sys_::Sys; pub use temp::SysTemp; +pub use users::SysUsers; use chrono::{DateTime, Local}; use nu_protocol::{record, Record, Span, Value}; @@ -122,7 +124,29 @@ pub fn mem(span: Span) -> Value { Value::record(record, span) } -pub fn host(span: Span) -> Value { +pub fn users(span: Span) -> Value { + let users = Users::new_with_refreshed_list() + .iter() + .map(|user| { + let groups = user + .groups() + .iter() + .map(|group| Value::string(trim_cstyle_null(group.name()), span)) + .collect(); + + let record = record! { + "name" => Value::string(trim_cstyle_null(user.name()), span), + "groups" => Value::list(groups, span), + }; + + Value::record(record, span) + }) + .collect(); + + Value::list(users, span) +} + +pub fn host(span: Span) -> Record { let mut record = Record::new(); if let Some(name) = System::name() { @@ -160,27 +184,7 @@ pub fn host(span: Span) -> Value { let timestamp_str = datetime.with_timezone(datetime.offset()).to_rfc3339(); record.push("boot_time", Value::string(timestamp_str, span)); - let users = Users::new_with_refreshed_list() - .iter() - .map(|user| { - let groups = user - .groups() - .iter() - .map(|group| Value::string(trim_cstyle_null(group.name()), span)) - .collect(); - - let record = record! { - "name" => Value::string(trim_cstyle_null(user.name()), span), - "groups" => Value::list(groups, span), - }; - - Value::record(record, span) - }) - .collect(); - - record.push("sessions", Value::list(users, span)); - - Value::record(record, span) + record } pub fn temp(span: Span) -> Value { diff --git a/crates/nu-command/src/system/sys/sys_.rs b/crates/nu-command/src/system/sys/sys_.rs index 39dc2d419b..2886836be9 100644 --- a/crates/nu-command/src/system/sys/sys_.rs +++ b/crates/nu-command/src/system/sys/sys_.rs @@ -43,8 +43,11 @@ impl Command for Sys { ); let head = call.head; + + let mut host = super::host(head); + host.push("sessions", super::users(head)); let record = record! { - "host" => super::host(head), + "host" => Value::record(host, head), "cpu" => super::cpu(head), "disks" => super::disks(head), "mem" => super::mem(head), diff --git a/crates/nu-command/src/system/sys/users.rs b/crates/nu-command/src/system/sys/users.rs new file mode 100644 index 0000000000..9aab2b9b7b --- /dev/null +++ b/crates/nu-command/src/system/sys/users.rs @@ -0,0 +1,38 @@ +use nu_engine::command_prelude::*; + +#[derive(Clone)] +pub struct SysUsers; + +impl Command for SysUsers { + fn name(&self) -> &str { + "sys users" + } + + fn signature(&self) -> Signature { + Signature::build("sys users") + .category(Category::System) + .input_output_types(vec![(Type::Nothing, Type::record())]) + } + + fn usage(&self) -> &str { + "View information about the users on the system." + } + + fn run( + &self, + _engine_state: &EngineState, + _stack: &mut Stack, + call: &Call, + _input: PipelineData, + ) -> Result { + Ok(super::users(call.head).into_pipeline_data()) + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Show info about the system users", + example: "sys users", + result: None, + }] + } +} From 7a86b98f61e16ac4b1264e86d992aed7d83ca897 Mon Sep 17 00:00:00 2001 From: YizhePKU Date: Tue, 7 May 2024 23:17:49 +0800 Subject: [PATCH 002/137] Migrate to a new PWD API (part 2) (#12749) Refer to #12603 for part 1. We need to be careful when migrating to the new API, because the new API has slightly different semantics (PWD can contain symlinks). This PR handles the "obviously safe" part of the migrations. Namely, it handles two specific use cases: * Passing PWD into `canonicalize_with()` * Passing PWD into `EngineState::merge_env()` The first case is safe because symlinks are canonicalized away. The second case is safe because `EngineState::merge_env()` only uses PWD to call `std::env::set_current_dir()`, which shouldn't affact Nushell. The commit message contains detailed stats on the updated files. Because these migrations touch a lot of files, I want to keep these PRs small to avoid merge conflicts. --- benches/benchmarks.rs | 3 +- crates/nu-cli/src/config_files.rs | 64 +++++++++---------- crates/nu-cli/src/eval_file.rs | 6 +- crates/nu-cmd-base/src/util.rs | 3 +- .../nu-cmd-plugin/src/commands/plugin/add.rs | 6 +- crates/nu-command/src/filesystem/glob.rs | 6 +- crates/nu-command/src/filesystem/watch.rs | 6 +- crates/nu-engine/src/env.rs | 3 +- crates/nu-protocol/src/engine/engine_state.rs | 14 ++++ crates/nu-std/src/lib.rs | 6 +- src/config_files.rs | 25 ++++---- src/test_bins.rs | 5 +- 12 files changed, 76 insertions(+), 71 deletions(-) diff --git a/benches/benchmarks.rs b/benches/benchmarks.rs index 859e3e2d2d..e291eeebcc 100644 --- a/benches/benchmarks.rs +++ b/benches/benchmarks.rs @@ -23,8 +23,7 @@ fn load_bench_commands() -> EngineState { } fn canonicalize_path(engine_state: &EngineState, path: &Path) -> PathBuf { - #[allow(deprecated)] - let cwd = engine_state.current_work_dir(); + let cwd = engine_state.cwd_as_string(None).unwrap(); if path.exists() { match nu_path::canonicalize_with(path, cwd) { diff --git a/crates/nu-cli/src/config_files.rs b/crates/nu-cli/src/config_files.rs index 775d76382b..091fe7daa3 100644 --- a/crates/nu-cli/src/config_files.rs +++ b/crates/nu-cli/src/config_files.rs @@ -177,36 +177,36 @@ pub fn add_plugin_file( use std::path::Path; let working_set = StateWorkingSet::new(engine_state); - #[allow(deprecated)] - let cwd = working_set.get_cwd(); - if let Some(plugin_file) = plugin_file { - let path = Path::new(&plugin_file.item); - let path_dir = path.parent().unwrap_or(path); - // Just try to canonicalize the directory of the plugin file first. - if let Ok(path_dir) = canonicalize_with(path_dir, &cwd) { - // Try to canonicalize the actual filename, but it's ok if that fails. The file doesn't - // have to exist. - let path = path_dir.join(path.file_name().unwrap_or(path.as_os_str())); - let path = canonicalize_with(&path, &cwd).unwrap_or(path); - engine_state.plugin_path = Some(path) - } else { - // It's an error if the directory for the plugin file doesn't exist. - report_error( - &working_set, - &ParseError::FileNotFound( - path_dir.to_string_lossy().into_owned(), - plugin_file.span, - ), - ); + if let Ok(cwd) = engine_state.cwd_as_string(None) { + if let Some(plugin_file) = plugin_file { + let path = Path::new(&plugin_file.item); + let path_dir = path.parent().unwrap_or(path); + // Just try to canonicalize the directory of the plugin file first. + if let Ok(path_dir) = canonicalize_with(path_dir, &cwd) { + // Try to canonicalize the actual filename, but it's ok if that fails. The file doesn't + // have to exist. + let path = path_dir.join(path.file_name().unwrap_or(path.as_os_str())); + let path = canonicalize_with(&path, &cwd).unwrap_or(path); + engine_state.plugin_path = Some(path) + } else { + // It's an error if the directory for the plugin file doesn't exist. + report_error( + &working_set, + &ParseError::FileNotFound( + path_dir.to_string_lossy().into_owned(), + plugin_file.span, + ), + ); + } + } else if let Some(mut plugin_path) = nu_path::config_dir() { + // Path to store plugins signatures + plugin_path.push(storage_path); + let mut plugin_path = canonicalize_with(&plugin_path, &cwd).unwrap_or(plugin_path); + plugin_path.push(PLUGIN_FILE); + let plugin_path = canonicalize_with(&plugin_path, &cwd).unwrap_or(plugin_path); + engine_state.plugin_path = Some(plugin_path); } - } else if let Some(mut plugin_path) = nu_path::config_dir() { - // Path to store plugins signatures - plugin_path.push(storage_path); - let mut plugin_path = canonicalize_with(&plugin_path, &cwd).unwrap_or(plugin_path); - plugin_path.push(PLUGIN_FILE); - let plugin_path = canonicalize_with(&plugin_path, &cwd).unwrap_or(plugin_path); - engine_state.plugin_path = Some(plugin_path); } } @@ -236,8 +236,7 @@ pub fn eval_config_contents( engine_state.file = prev_file; // Merge the environment in case env vars changed in the config - #[allow(deprecated)] - match nu_engine::env::current_dir(engine_state, stack) { + match engine_state.cwd(Some(stack)) { Ok(cwd) => { if let Err(e) = engine_state.merge_env(stack, cwd) { let working_set = StateWorkingSet::new(engine_state); @@ -274,8 +273,9 @@ pub fn migrate_old_plugin_file(engine_state: &EngineState, storage_path: &str) - let start_time = std::time::Instant::now(); - #[allow(deprecated)] - let cwd = engine_state.current_work_dir(); + let Ok(cwd) = engine_state.cwd_as_string(None) else { + return false; + }; let Some(config_dir) = nu_path::config_dir().and_then(|mut dir| { dir.push(storage_path); diff --git a/crates/nu-cli/src/eval_file.rs b/crates/nu-cli/src/eval_file.rs index 90b1e840ee..8107de71a5 100644 --- a/crates/nu-cli/src/eval_file.rs +++ b/crates/nu-cli/src/eval_file.rs @@ -1,8 +1,7 @@ use crate::util::eval_source; use log::{info, trace}; use miette::{IntoDiagnostic, Result}; -#[allow(deprecated)] -use nu_engine::{convert_env_values, current_dir, eval_block}; +use nu_engine::{convert_env_values, eval_block}; use nu_parser::parse; use nu_path::canonicalize_with; use nu_protocol::{ @@ -30,8 +29,7 @@ pub fn evaluate_file( std::process::exit(1); } - #[allow(deprecated)] - let cwd = current_dir(engine_state, stack)?; + let cwd = engine_state.cwd_as_string(Some(stack))?; let file_path = canonicalize_with(&path, cwd).unwrap_or_else(|e| { let working_set = StateWorkingSet::new(engine_state); diff --git a/crates/nu-cmd-base/src/util.rs b/crates/nu-cmd-base/src/util.rs index 8654975c2b..619237a21c 100644 --- a/crates/nu-cmd-base/src/util.rs +++ b/crates/nu-cmd-base/src/util.rs @@ -13,8 +13,7 @@ pub fn get_init_cwd() -> PathBuf { } pub fn get_guaranteed_cwd(engine_state: &EngineState, stack: &Stack) -> PathBuf { - #[allow(deprecated)] - nu_engine::env::current_dir(engine_state, stack).unwrap_or_else(|e| { + engine_state.cwd(Some(stack)).unwrap_or_else(|e| { let working_set = StateWorkingSet::new(engine_state); report_error(&working_set, &e); crate::util::get_init_cwd() diff --git a/crates/nu-cmd-plugin/src/commands/plugin/add.rs b/crates/nu-cmd-plugin/src/commands/plugin/add.rs index 70f1f417b6..e2c1c31151 100644 --- a/crates/nu-cmd-plugin/src/commands/plugin/add.rs +++ b/crates/nu-cmd-plugin/src/commands/plugin/add.rs @@ -1,5 +1,4 @@ -#[allow(deprecated)] -use nu_engine::{command_prelude::*, current_dir}; +use nu_engine::command_prelude::*; use nu_plugin_engine::{GetPlugin, PersistentPlugin}; use nu_protocol::{PluginGcConfig, PluginIdentity, PluginRegistryItem, RegisteredPlugin}; use std::sync::Arc; @@ -82,8 +81,7 @@ apparent the next time `nu` is next launched with that plugin registry file. let filename: Spanned = call.req(engine_state, stack, 0)?; let shell: Option> = call.get_flag(engine_state, stack, "shell")?; - #[allow(deprecated)] - let cwd = current_dir(engine_state, stack)?; + let cwd = engine_state.cwd(Some(stack))?; // Check the current directory, or fall back to NU_PLUGIN_DIRS let filename_expanded = nu_path::locate_in_dirs(&filename.item, &cwd, || { diff --git a/crates/nu-command/src/filesystem/glob.rs b/crates/nu-command/src/filesystem/glob.rs index c5f4bc08b4..b10e8893a0 100644 --- a/crates/nu-command/src/filesystem/glob.rs +++ b/crates/nu-command/src/filesystem/glob.rs @@ -1,5 +1,4 @@ -#[allow(deprecated)] -use nu_engine::{command_prelude::*, env::current_dir}; +use nu_engine::command_prelude::*; use std::sync::{atomic::AtomicBool, Arc}; use wax::{Glob as WaxGlob, WalkBehavior, WalkEntry}; @@ -179,8 +178,7 @@ impl Command for Glob { } }; - #[allow(deprecated)] - let path = current_dir(engine_state, stack)?; + let path = engine_state.cwd_as_string(Some(stack))?; let path = match nu_path::canonicalize_with(prefix, path) { Ok(path) => path, Err(e) if e.to_string().contains("os error 2") => diff --git a/crates/nu-command/src/filesystem/watch.rs b/crates/nu-command/src/filesystem/watch.rs index 224d58d0d0..fda542c8a8 100644 --- a/crates/nu-command/src/filesystem/watch.rs +++ b/crates/nu-command/src/filesystem/watch.rs @@ -5,8 +5,7 @@ use notify_debouncer_full::{ EventKind, RecursiveMode, Watcher, }, }; -#[allow(deprecated)] -use nu_engine::{command_prelude::*, current_dir, ClosureEval}; +use nu_engine::{command_prelude::*, ClosureEval}; use nu_protocol::{ engine::{Closure, StateWorkingSet}, format_error, @@ -74,8 +73,7 @@ impl Command for Watch { _input: PipelineData, ) -> Result { let head = call.head; - #[allow(deprecated)] - let cwd = current_dir(engine_state, stack)?; + let cwd = engine_state.cwd_as_string(Some(stack))?; let path_arg: Spanned = call.req(engine_state, stack, 0)?; let path_no_whitespace = &path_arg diff --git a/crates/nu-engine/src/env.rs b/crates/nu-engine/src/env.rs index 19ed589dcf..ae226c4421 100644 --- a/crates/nu-engine/src/env.rs +++ b/crates/nu-engine/src/env.rs @@ -286,8 +286,7 @@ pub fn find_in_dirs_env( Err(e) => return Err(e), } } else { - #[allow(deprecated)] - current_dir_str(engine_state, stack)? + engine_state.cwd_as_string(Some(stack))? }; let check_dir = |lib_dirs: Option| -> Option { diff --git a/crates/nu-protocol/src/engine/engine_state.rs b/crates/nu-protocol/src/engine/engine_state.rs index 5012754b3b..1593b3341a 100644 --- a/crates/nu-protocol/src/engine/engine_state.rs +++ b/crates/nu-protocol/src/engine/engine_state.rs @@ -984,6 +984,20 @@ impl EngineState { } } + /// Like `EngineState::cwd()`, but returns a String instead of a PathBuf for convenience. + pub fn cwd_as_string(&self, stack: Option<&Stack>) -> Result { + let cwd = self.cwd(stack)?; + cwd.into_os_string() + .into_string() + .map_err(|err| ShellError::NonUtf8Custom { + msg: format!( + "The current working directory is not a valid utf-8 string: {:?}", + err + ), + span: Span::unknown(), + }) + } + // TODO: see if we can completely get rid of this pub fn get_file_contents(&self) -> &[CachedFile] { &self.files diff --git a/crates/nu-std/src/lib.rs b/crates/nu-std/src/lib.rs index a3cf9271a5..a20e3fc5da 100644 --- a/crates/nu-std/src/lib.rs +++ b/crates/nu-std/src/lib.rs @@ -1,6 +1,5 @@ use log::trace; -#[allow(deprecated)] -use nu_engine::{env::current_dir, eval_block}; +use nu_engine::eval_block; use nu_parser::parse; use nu_protocol::{ debugger::WithoutDebug, @@ -99,8 +98,7 @@ use std pwd eval_block::(engine_state, &mut stack, &block, pipeline_data)?; - #[allow(deprecated)] - let cwd = current_dir(engine_state, &stack)?; + let cwd = engine_state.cwd(Some(&stack))?; engine_state.merge_env(&mut stack, cwd)?; Ok(()) diff --git a/src/config_files.rs b/src/config_files.rs index e67af7f2e1..6b2e5bb16d 100644 --- a/src/config_files.rs +++ b/src/config_files.rs @@ -31,14 +31,19 @@ pub(crate) fn read_config_file( // Load config startup file if let Some(file) = config_file { let working_set = StateWorkingSet::new(engine_state); - #[allow(deprecated)] - let cwd = working_set.get_cwd(); - if let Ok(path) = canonicalize_with(&file.item, cwd) { - eval_config_contents(path, engine_state, stack); - } else { - let e = ParseError::FileNotFound(file.item, file.span); - report_error(&working_set, &e); + match engine_state.cwd_as_string(Some(stack)) { + Ok(cwd) => { + if let Ok(path) = canonicalize_with(&file.item, cwd) { + eval_config_contents(path, engine_state, stack); + } else { + let e = ParseError::FileNotFound(file.item, file.span); + report_error(&working_set, &e); + } + } + Err(e) => { + report_error(&working_set, &e); + } } } else if let Some(mut config_path) = nu_path::config_dir() { config_path.push(NUSHELL_FOLDER); @@ -144,8 +149,7 @@ pub(crate) fn read_default_env_file(engine_state: &mut EngineState, stack: &mut info!("read_config_file {}:{}:{}", file!(), line!(), column!()); // Merge the environment in case env vars changed in the config - #[allow(deprecated)] - match nu_engine::env::current_dir(engine_state, stack) { + match engine_state.cwd(Some(stack)) { Ok(cwd) => { if let Err(e) = engine_state.merge_env(stack, cwd) { let working_set = StateWorkingSet::new(engine_state); @@ -186,8 +190,7 @@ fn eval_default_config( ); // Merge the environment in case env vars changed in the config - #[allow(deprecated)] - match nu_engine::env::current_dir(engine_state, stack) { + match engine_state.cwd(Some(stack)) { Ok(cwd) => { if let Err(e) = engine_state.merge_env(stack, cwd) { let working_set = StateWorkingSet::new(engine_state); diff --git a/src/test_bins.rs b/src/test_bins.rs index 5fef4976a7..73a760ada1 100644 --- a/src/test_bins.rs +++ b/src/test_bins.rs @@ -249,8 +249,9 @@ pub fn nu_repl() { for (i, line) in source_lines.iter().enumerate() { let mut stack = Stack::with_parent(top_stack.clone()); - #[allow(deprecated)] - let cwd = nu_engine::env::current_dir(&engine_state, &stack) + + let cwd = engine_state + .cwd(Some(&stack)) .unwrap_or_else(|err| outcome_err(&engine_state, &err)); // Before doing anything, merge the environment from the previous REPL iteration into the From cad22bb833cc98373dbe0d88d2f14bde9c481af2 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 8 May 2024 10:35:08 +0800 Subject: [PATCH 003/137] Bump actions/checkout from 4.1.4 to 4.1.5 (#12804) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [actions/checkout](https://github.com/actions/checkout) from 4.1.4 to 4.1.5.
Release notes

Sourced from actions/checkout's releases.

v4.1.5

What's Changed

Full Changelog: https://github.com/actions/checkout/compare/v4.1.4...v4.1.5

Changelog

Sourced from actions/checkout's changelog.

Changelog

Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=actions/checkout&package-manager=github_actions&previous-version=4.1.4&new-version=4.1.5)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/audit.yml | 2 +- .github/workflows/ci.yml | 8 ++++---- .github/workflows/nightly-build.yml | 8 ++++---- .github/workflows/release.yml | 4 ++-- .github/workflows/typos.yml | 2 +- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/.github/workflows/audit.yml b/.github/workflows/audit.yml index 6d18269e89..e8a7f55817 100644 --- a/.github/workflows/audit.yml +++ b/.github/workflows/audit.yml @@ -19,7 +19,7 @@ jobs: # Prevent sudden announcement of a new advisory from failing ci: continue-on-error: true steps: - - uses: actions/checkout@v4.1.4 + - uses: actions/checkout@v4.1.5 - uses: rustsec/audit-check@v1.4.1 with: token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5aec3f923a..4815491854 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -44,7 +44,7 @@ jobs: runs-on: ${{ matrix.platform }} steps: - - uses: actions/checkout@v4.1.4 + - uses: actions/checkout@v4.1.5 - name: Setup Rust toolchain and cache uses: actions-rust-lang/setup-rust-toolchain@v1.8.0 @@ -89,7 +89,7 @@ jobs: runs-on: ${{ matrix.platform }} steps: - - uses: actions/checkout@v4.1.4 + - uses: actions/checkout@v4.1.5 - name: Setup Rust toolchain and cache uses: actions-rust-lang/setup-rust-toolchain@v1.8.0 @@ -121,7 +121,7 @@ jobs: runs-on: ${{ matrix.platform }} steps: - - uses: actions/checkout@v4.1.4 + - uses: actions/checkout@v4.1.5 - name: Setup Rust toolchain and cache uses: actions-rust-lang/setup-rust-toolchain@v1.8.0 @@ -174,7 +174,7 @@ jobs: runs-on: ${{ matrix.platform }} steps: - - uses: actions/checkout@v4.1.4 + - uses: actions/checkout@v4.1.5 - name: Setup Rust toolchain and cache uses: actions-rust-lang/setup-rust-toolchain@v1.8.0 diff --git a/.github/workflows/nightly-build.yml b/.github/workflows/nightly-build.yml index f86d7572a2..50235922e1 100644 --- a/.github/workflows/nightly-build.yml +++ b/.github/workflows/nightly-build.yml @@ -27,7 +27,7 @@ jobs: # if: github.repository == 'nushell/nightly' steps: - name: Checkout - uses: actions/checkout@v4.1.4 + uses: actions/checkout@v4.1.5 if: github.repository == 'nushell/nightly' with: ref: main @@ -123,7 +123,7 @@ jobs: runs-on: ${{matrix.os}} steps: - - uses: actions/checkout@v4.1.4 + - uses: actions/checkout@v4.1.5 with: ref: main fetch-depth: 0 @@ -235,7 +235,7 @@ jobs: runs-on: ${{matrix.os}} steps: - - uses: actions/checkout@v4.1.4 + - uses: actions/checkout@v4.1.5 with: ref: main fetch-depth: 0 @@ -310,7 +310,7 @@ jobs: - name: Waiting for Release run: sleep 1800 - - uses: actions/checkout@v4.1.4 + - uses: actions/checkout@v4.1.5 with: ref: main diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index fce38d9b4b..2cc7cbcd9b 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -73,7 +73,7 @@ jobs: runs-on: ${{matrix.os}} steps: - - uses: actions/checkout@v4.1.4 + - uses: actions/checkout@v4.1.5 - name: Update Rust Toolchain Target run: | @@ -163,7 +163,7 @@ jobs: runs-on: ${{matrix.os}} steps: - - uses: actions/checkout@v4.1.4 + - uses: actions/checkout@v4.1.5 - name: Update Rust Toolchain Target run: | diff --git a/.github/workflows/typos.yml b/.github/workflows/typos.yml index a9354ade1d..dacfb83928 100644 --- a/.github/workflows/typos.yml +++ b/.github/workflows/typos.yml @@ -7,7 +7,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout Actions Repository - uses: actions/checkout@v4.1.4 + uses: actions/checkout@v4.1.5 - name: Check spelling uses: crate-ci/typos@v1.21.0 From f851b61cb71dacffdf82d2d6ff7056d3e40f5377 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 8 May 2024 10:35:18 +0800 Subject: [PATCH 004/137] Bump softprops/action-gh-release from 2.0.4 to 2.0.5 (#12803) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [softprops/action-gh-release](https://github.com/softprops/action-gh-release) from 2.0.4 to 2.0.5.
Release notes

Sourced from softprops/action-gh-release's releases.

v2.0.5

Changelog

Sourced from softprops/action-gh-release's changelog.

2.0.5

Commits

Most Recent Ignore Conditions Applied to This Pull Request | Dependency Name | Ignore Conditions | | --- | --- | | softprops/action-gh-release | [< 0.2, > 0.1.13] |
[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=softprops/action-gh-release&package-manager=github_actions&previous-version=2.0.4&new-version=2.0.5)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/nightly-build.yml | 4 ++-- .github/workflows/release.yml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/nightly-build.yml b/.github/workflows/nightly-build.yml index 50235922e1..ab9f93d97d 100644 --- a/.github/workflows/nightly-build.yml +++ b/.github/workflows/nightly-build.yml @@ -174,7 +174,7 @@ jobs: # REF: https://github.com/marketplace/actions/gh-release # Create a release only in nushell/nightly repo - name: Publish Archive - uses: softprops/action-gh-release@v2.0.4 + uses: softprops/action-gh-release@v2.0.5 if: ${{ startsWith(github.repository, 'nushell/nightly') }} with: prerelease: true @@ -286,7 +286,7 @@ jobs: # REF: https://github.com/marketplace/actions/gh-release # Create a release only in nushell/nightly repo - name: Publish Archive - uses: softprops/action-gh-release@v2.0.4 + uses: softprops/action-gh-release@v2.0.5 if: ${{ startsWith(github.repository, 'nushell/nightly') }} with: draft: false diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 2cc7cbcd9b..ffe653bd22 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -104,7 +104,7 @@ jobs: # REF: https://github.com/marketplace/actions/gh-release - name: Publish Archive - uses: softprops/action-gh-release@v2.0.4 + uses: softprops/action-gh-release@v2.0.5 if: ${{ startsWith(github.ref, 'refs/tags/') }} with: draft: true @@ -194,7 +194,7 @@ jobs: # REF: https://github.com/marketplace/actions/gh-release - name: Publish Archive - uses: softprops/action-gh-release@v2.0.4 + uses: softprops/action-gh-release@v2.0.5 if: ${{ startsWith(github.ref, 'refs/tags/') }} with: draft: true From e462b6cd990904b770b8282c7c1eefdb3a5d94be Mon Sep 17 00:00:00 2001 From: Devyn Cairns Date: Tue, 7 May 2024 20:12:32 -0700 Subject: [PATCH 005/137] Make the message when running a plugin exe directly clearer (#12806) # Description This changes the message that shows up when running a plugin executable directly rather than as a plugin to direct the user to run `plugin add --help`, which should have enough information to figure out what's going on. The message previously just vaguely suggested that the user needs to run the plugin "from within Nushell", which is not really enough - it has to be added with `plugin add` to be used as a plugin. Also fix docs for `plugin add` to mention `plugin use` rather than `register` (oops) --- crates/nu-cmd-plugin/src/commands/plugin/add.rs | 3 ++- crates/nu-plugin/src/plugin/mod.rs | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/crates/nu-cmd-plugin/src/commands/plugin/add.rs b/crates/nu-cmd-plugin/src/commands/plugin/add.rs index e2c1c31151..225941db01 100644 --- a/crates/nu-cmd-plugin/src/commands/plugin/add.rs +++ b/crates/nu-cmd-plugin/src/commands/plugin/add.rs @@ -43,7 +43,8 @@ impl Command for PluginAdd { fn extra_usage(&self) -> &str { r#" -This does not load the plugin commands into the scope - see `register` for that. +This does not load the plugin commands into the scope - see `plugin use` for +that. Instead, it runs the plugin to get its command signatures, and then edits the plugin registry file (by default, `$nu.plugin-path`). The changes will be diff --git a/crates/nu-plugin/src/plugin/mod.rs b/crates/nu-plugin/src/plugin/mod.rs index 0ec170f4cd..30ed196dc6 100644 --- a/crates/nu-plugin/src/plugin/mod.rs +++ b/crates/nu-plugin/src/plugin/mod.rs @@ -260,7 +260,8 @@ pub fn serve_plugin(plugin: &impl Plugin, encoder: impl PluginEncoder + 'static) } } else { eprintln!( - "{}: This plugin must be run from within Nushell.", + "{}: This plugin must be run from within Nushell. See `plugin add --help` for details \ + on how to use plugins.", env::current_exe() .map(|path| path.display().to_string()) .unwrap_or_else(|_| "plugin".into()) From 3b26c08dab31b98658e0836dd113d1f7af6c8d29 Mon Sep 17 00:00:00 2001 From: Ian Manske Date: Wed, 8 May 2024 11:50:58 +0000 Subject: [PATCH 006/137] Refactor `parse` command (#12791) # Description - Switches the `excess` in the `ParserStream` and `ParseStreamerExternal` types from a `Vec` to a `VecDeque` - Removes unnecessary clones to `stream_helper` - Other simplifications and loop restructuring - Merges the `ParseStreamer` and `ParseStreamerExternal` types into a common `ParseIter` - `parse` now streams for list values --- crates/nu-command/src/strings/parse.rs | 356 ++++++++++--------------- 1 file changed, 136 insertions(+), 220 deletions(-) diff --git a/crates/nu-command/src/strings/parse.rs b/crates/nu-command/src/strings/parse.rs index 8f6d35b0b3..51067a16a2 100644 --- a/crates/nu-command/src/strings/parse.rs +++ b/crates/nu-command/src/strings/parse.rs @@ -1,9 +1,9 @@ -use fancy_regex::Regex; +use fancy_regex::{Captures, Regex}; use nu_engine::command_prelude::*; -use nu_protocol::{ListStream, ValueIterator}; -use std::sync::{ - atomic::{AtomicBool, Ordering}, - Arc, +use nu_protocol::ListStream; +use std::{ + collections::VecDeque, + sync::{atomic::AtomicBool, Arc}, }; #[derive(Clone)] @@ -119,7 +119,6 @@ fn operate( let head = call.head; let pattern: Spanned = call.req(engine_state, stack, 0)?; let regex: bool = call.has_flag(engine_state, stack, "regex")?; - let ctrlc = engine_state.ctrlc.clone(); let pattern_item = pattern.item; let pattern_span = pattern.span; @@ -130,7 +129,7 @@ fn operate( build_regex(&pattern_item, pattern_span)? }; - let regex_pattern = Regex::new(&item_to_parse).map_err(|e| ShellError::GenericError { + let regex = Regex::new(&item_to_parse).map_err(|e| ShellError::GenericError { error: "Error with regular expression".into(), msg: e.to_string(), span: Some(pattern_span), @@ -138,92 +137,108 @@ fn operate( inner: vec![], })?; - let columns = column_names(®ex_pattern); + let columns = regex + .capture_names() + .skip(1) + .enumerate() + .map(|(i, name)| { + name.map(String::from) + .unwrap_or_else(|| format!("capture{i}")) + }) + .collect::>(); + + let ctrlc = engine_state.ctrlc.clone(); match input { PipelineData::Empty => Ok(PipelineData::Empty), - PipelineData::Value(..) => { - let mut parsed: Vec = Vec::new(); + PipelineData::Value(value, ..) => match value { + Value::String { val, .. } => { + let captures = regex + .captures_iter(&val) + .map(|captures| captures_to_value(captures, &columns, head)) + .collect::>()?; - for v in input { - let v_span = v.span(); - match v.coerce_into_string() { - Ok(s) => { - let results = regex_pattern.captures_iter(&s); - - for c in results { - let captures = match c { - Ok(c) => c, - Err(e) => { - return Err(ShellError::GenericError { - error: "Error with regular expression captures".into(), - msg: e.to_string(), - span: None, - help: None, - inner: vec![], - }) - } - }; - - let record = columns - .iter() - .zip(captures.iter().skip(1)) - .map(|(column_name, cap)| { - let cap_string = cap.map(|v| v.as_str()).unwrap_or(""); - (column_name.clone(), Value::string(cap_string, v_span)) - }) - .collect(); - - parsed.push(Value::record(record, head)); - } - } - Err(_) => { - return Err(ShellError::PipelineMismatch { - exp_input_type: "string".into(), - dst_span: head, - src_span: v_span, - }) - } - } + Ok(Value::list(captures, head).into_pipeline_data()) } + Value::List { vals, .. } => { + let iter = vals.into_iter().map(move |val| { + let span = val.span(); + val.into_string().map_err(|_| ShellError::PipelineMismatch { + exp_input_type: "string".into(), + dst_span: head, + src_span: span, + }) + }); - Ok(ListStream::new(parsed.into_iter(), head, ctrlc).into()) - } + let iter = ParseIter { + captures: VecDeque::new(), + regex, + columns, + iter, + span: head, + ctrlc, + }; + + Ok(ListStream::new(iter, head, None).into()) + } + value => Err(ShellError::PipelineMismatch { + exp_input_type: "string".into(), + dst_span: head, + src_span: value.span(), + }), + }, PipelineData::ListStream(stream, ..) => Ok(stream - .modify(|stream| ParseStreamer { - span: head, - excess: Vec::new(), - regex: regex_pattern, - columns, - stream, - ctrlc, + .modify(|stream| { + let iter = stream.map(move |val| { + let span = val.span(); + val.into_string().map_err(|_| ShellError::PipelineMismatch { + exp_input_type: "string".into(), + dst_span: head, + src_span: span, + }) + }); + + ParseIter { + captures: VecDeque::new(), + regex, + columns, + iter, + span: head, + ctrlc, + } }) .into()), - PipelineData::ExternalStream { stdout: None, .. } => Ok(PipelineData::Empty), - PipelineData::ExternalStream { stdout: Some(stream), .. - } => Ok(ListStream::new( - ParseStreamerExternal { - span: head, - excess: Vec::new(), - regex: regex_pattern, + } => { + // Collect all `stream` chunks into a single `chunk` to be able to deal with matches that + // extend across chunk boundaries. + // This is a stop-gap solution until the `regex` crate supports streaming or an alternative + // solution is found. + // See https://github.com/nushell/nushell/issues/9795 + let str = stream.into_string()?.item; + + // let iter = stream.lines(); + + let iter = ParseIter { + captures: VecDeque::new(), + regex, columns, - stream: stream.stream, - }, - head, - ctrlc, - ) - .into()), + iter: std::iter::once(Ok(str)), + span: head, + ctrlc, + }; + + Ok(ListStream::new(iter, head, None).into()) + } } } fn build_regex(input: &str, span: Span) -> Result { let mut output = "(?s)\\A".to_string(); - //let mut loop_input = input; let mut loop_input = input.chars().peekable(); loop { let mut before = String::new(); @@ -274,172 +289,73 @@ fn build_regex(input: &str, span: Span) -> Result { Ok(output) } -fn column_names(regex: &Regex) -> Vec { - regex - .capture_names() - .enumerate() - .skip(1) - .map(|(i, name)| { - name.map(String::from) - .unwrap_or_else(|| format!("capture{}", i - 1)) - }) - .collect() -} - -pub struct ParseStreamer { - span: Span, - excess: Vec, +struct ParseIter>> { + captures: VecDeque, regex: Regex, columns: Vec, - stream: ValueIterator, + iter: I, + span: Span, ctrlc: Option>, } -impl Iterator for ParseStreamer { - type Item = Value; - fn next(&mut self) -> Option { - if !self.excess.is_empty() { - return Some(self.excess.remove(0)); +impl>> ParseIter { + fn populate_captures(&mut self, str: &str) -> Result<(), ShellError> { + for captures in self.regex.captures_iter(str) { + self.captures + .push_back(captures_to_value(captures, &self.columns, self.span)?); } + Ok(()) + } +} +impl>> Iterator for ParseIter { + type Item = Value; + + fn next(&mut self) -> Option { loop { - if let Some(ctrlc) = &self.ctrlc { - if ctrlc.load(Ordering::SeqCst) { - break None; - } + if nu_utils::ctrl_c::was_pressed(&self.ctrlc) { + return None; } - let v = self.stream.next()?; - let span = v.span(); + if let Some(val) = self.captures.pop_front() { + return Some(val); + } - let Ok(s) = v.coerce_into_string() else { - return Some(Value::error( - ShellError::PipelineMismatch { - exp_input_type: "string".into(), - dst_span: self.span, - src_span: span, - }, - span, - )); - }; + let result = self + .iter + .next()? + .and_then(|str| self.populate_captures(&str)); - let parsed = stream_helper( - self.regex.clone(), - span, - s, - self.columns.clone(), - &mut self.excess, - ); - - if parsed.is_none() { - continue; - }; - - return parsed; + if let Err(err) = result { + return Some(Value::error(err, self.span)); + } } } } -pub struct ParseStreamerExternal { +fn captures_to_value( + captures: Result, + columns: &[String], span: Span, - excess: Vec, - regex: Regex, - columns: Vec, - stream: Box, ShellError>> + Send + 'static>, -} +) -> Result { + let captures = captures.map_err(|err| ShellError::GenericError { + error: "Error with regular expression captures".into(), + msg: err.to_string(), + span: Some(span), + help: None, + inner: vec![], + })?; -impl Iterator for ParseStreamerExternal { - type Item = Value; - fn next(&mut self) -> Option { - if !self.excess.is_empty() { - return Some(self.excess.remove(0)); - } + let record = columns + .iter() + .zip(captures.iter().skip(1)) + .map(|(column, match_)| { + let match_str = match_.map(|m| m.as_str()).unwrap_or(""); + (column.clone(), Value::string(match_str, span)) + }) + .collect(); - let mut chunk = self.stream.next(); - - // Collect all `stream` chunks into a single `chunk` to be able to deal with matches that - // extend across chunk boundaries. - // This is a stop-gap solution until the `regex` crate supports streaming or an alternative - // solution is found. - // See https://github.com/nushell/nushell/issues/9795 - while let Some(Ok(chunks)) = &mut chunk { - match self.stream.next() { - Some(Ok(mut next_chunk)) => chunks.append(&mut next_chunk), - error @ Some(Err(_)) => chunk = error, - None => break, - } - } - - let chunk = match chunk { - Some(Ok(chunk)) => chunk, - Some(Err(err)) => return Some(Value::error(err, self.span)), - _ => return None, - }; - - let Ok(chunk) = String::from_utf8(chunk) else { - return Some(Value::error( - ShellError::PipelineMismatch { - exp_input_type: "string".into(), - dst_span: self.span, - src_span: self.span, - }, - self.span, - )); - }; - - stream_helper( - self.regex.clone(), - self.span, - chunk, - self.columns.clone(), - &mut self.excess, - ) - } -} - -fn stream_helper( - regex: Regex, - span: Span, - s: String, - columns: Vec, - excess: &mut Vec, -) -> Option { - let results = regex.captures_iter(&s); - - for c in results { - let captures = match c { - Ok(c) => c, - Err(e) => { - return Some(Value::error( - ShellError::GenericError { - error: "Error with regular expression captures".into(), - msg: e.to_string(), - span: Some(span), - help: Some(e.to_string()), - inner: vec![], - }, - span, - )) - } - }; - - let record = columns - .iter() - .zip(captures.iter().skip(1)) - .map(|(column_name, cap)| { - let cap_string = cap.map(|v| v.as_str()).unwrap_or(""); - (column_name.clone(), Value::string(cap_string, span)) - }) - .collect(); - - excess.push(Value::record(record, span)); - } - - if !excess.is_empty() { - Some(excess.remove(0)) - } else { - None - } + Ok(Value::record(record, span)) } #[cfg(test)] From 5466da3b52352ba67d98504cfc186db42eea4543 Mon Sep 17 00:00:00 2001 From: Darren Schroeder <343840+fdncred@users.noreply.github.com> Date: Wed, 8 May 2024 14:34:04 -0400 Subject: [PATCH 007/137] cleanup osc calls for shell_integration (#12810) # Description This PR is a continuation of #12629 and meant to address [Reilly's stated issue](https://github.com/nushell/nushell/pull/12629#issuecomment-2099660609). With this PR, nushell should work more consistently with WezTerm on Windows. However, that means continued scrolling with typing if osc133 is enabled. If it's possible to run WezTerm inside of vscode, then having osc633 enabled will also cause the display to scroll with every character typed. I think the cause of this is that reedline paints the entire prompt on each character typed. We need to figure out how to fix that, but that's in reedline. For my purposes, I keep osc133 and osc633 set to true and don't use WezTerm on Windows. Thanks @rgwood for reporting the issue. I found several logic errors. It's often good to come back to PRs and look at them with fresh eyes. I think this is pretty close to logically correct now. However, I'm approaching burn out on ansi escape codes so i could've missed something. Kudos to [escape-artist](https://github.com/rgwood/escape-artist) for helping me debug an ansi escape codes that are actually being sent to the terminal. It was an invaluable tool. # User-Facing Changes # Tests + Formatting # After Submitting --- crates/nu-cli/src/prompt.rs | 6 ++- crates/nu-cli/src/prompt_update.rs | 56 ++++++++-------------- crates/nu-cli/src/repl.rs | 76 +++++++++++++++++++++--------- 3 files changed, 79 insertions(+), 59 deletions(-) diff --git a/crates/nu-cli/src/prompt.rs b/crates/nu-cli/src/prompt.rs index a2045a201c..744640b76f 100644 --- a/crates/nu-cli/src/prompt.rs +++ b/crates/nu-cli/src/prompt.rs @@ -129,9 +129,11 @@ impl Prompt for NushellPrompt { { // We're in vscode and we have osc633 enabled format!("{VSCODE_PRE_PROMPT_MARKER}{prompt}{VSCODE_POST_PROMPT_MARKER}").into() - } else { - // If we're in VSCode but we don't find the env var, just return the regular markers + } else if self.shell_integration_osc133 { + // If we're in VSCode but we don't find the env var, but we have osc133 set, then use it format!("{PRE_PROMPT_MARKER}{prompt}{POST_PROMPT_MARKER}").into() + } else { + prompt.into() } } else if self.shell_integration_osc133 { format!("{PRE_PROMPT_MARKER}{prompt}{POST_PROMPT_MARKER}").into() diff --git a/crates/nu-cli/src/prompt_update.rs b/crates/nu-cli/src/prompt_update.rs index 0c5641378b..5fe2485ca8 100644 --- a/crates/nu-cli/src/prompt_update.rs +++ b/crates/nu-cli/src/prompt_update.rs @@ -108,50 +108,34 @@ pub(crate) fn update_prompt( stack: &mut Stack, nu_prompt: &mut NushellPrompt, ) { - let left_prompt_string = get_prompt_string(PROMPT_COMMAND, config, engine_state, stack); + let configured_left_prompt_string = + match get_prompt_string(PROMPT_COMMAND, config, engine_state, stack) { + Some(s) => s, + None => "".to_string(), + }; // Now that we have the prompt string lets ansify it. // <133 A><133 B><133 C> - let left_prompt_string_133 = if config.shell_integration_osc133 { - if let Some(prompt_string) = left_prompt_string.clone() { + let left_prompt_string = if config.shell_integration_osc633 { + if stack.get_env_var(engine_state, "TERM_PROGRAM") == Some(Value::test_string("vscode")) { + // We're in vscode and we have osc633 enabled Some(format!( - "{PRE_PROMPT_MARKER}{prompt_string}{POST_PROMPT_MARKER}" + "{VSCODE_PRE_PROMPT_MARKER}{configured_left_prompt_string}{VSCODE_POST_PROMPT_MARKER}" + )) + } else if config.shell_integration_osc133 { + // If we're in VSCode but we don't find the env var, but we have osc133 set, then use it + Some(format!( + "{PRE_PROMPT_MARKER}{configured_left_prompt_string}{POST_PROMPT_MARKER}" )) } else { - left_prompt_string.clone() + configured_left_prompt_string.into() } + } else if config.shell_integration_osc133 { + Some(format!( + "{PRE_PROMPT_MARKER}{configured_left_prompt_string}{POST_PROMPT_MARKER}" + )) } else { - left_prompt_string.clone() - }; - - let left_prompt_string_633 = if config.shell_integration_osc633 { - if let Some(prompt_string) = left_prompt_string.clone() { - if stack.get_env_var(engine_state, "TERM_PROGRAM") == Some(Value::test_string("vscode")) - { - // If the user enabled osc633 and we're in vscode, use the vscode markers - Some(format!( - "{VSCODE_PRE_PROMPT_MARKER}{prompt_string}{VSCODE_POST_PROMPT_MARKER}" - )) - } else { - // otherwise, use the regular osc133 markers - Some(format!( - "{PRE_PROMPT_MARKER}{prompt_string}{POST_PROMPT_MARKER}" - )) - } - } else { - left_prompt_string.clone() - } - } else { - left_prompt_string.clone() - }; - - let left_prompt_string = match (left_prompt_string_133, left_prompt_string_633) { - (None, None) => left_prompt_string, - (None, Some(l633)) => Some(l633), - (Some(l133), None) => Some(l133), - // If both are set, it means we're in vscode, so use the vscode markers - // and even if we're not actually in vscode atm, the regular 133 markers are used - (Some(_l133), Some(l633)) => Some(l633), + configured_left_prompt_string.into() }; let right_prompt_string = get_prompt_string(PROMPT_COMMAND_RIGHT, config, engine_state, stack); diff --git a/crates/nu-cli/src/repl.rs b/crates/nu-cli/src/repl.rs index 30cccebbde..338d924a69 100644 --- a/crates/nu-cli/src/repl.rs +++ b/crates/nu-cli/src/repl.rs @@ -620,7 +620,7 @@ fn loop_iteration(ctx: LoopContext) -> (bool, Stack, Reedline) { column!(), use_color, ); - } else { + } else if shell_integration_osc133 { start_time = Instant::now(); run_ansi_sequence(PRE_EXECUTION_MARKER); @@ -660,9 +660,9 @@ fn loop_iteration(ctx: LoopContext) -> (bool, Stack, Reedline) { run_finaliziation_ansi_sequence( &stack, engine_state, + use_color, shell_integration_osc633, shell_integration_osc133, - use_color, ); } ReplOperation::RunCommand(cmd) => { @@ -679,9 +679,9 @@ fn loop_iteration(ctx: LoopContext) -> (bool, Stack, Reedline) { run_finaliziation_ansi_sequence( &stack, engine_state, + use_color, shell_integration_osc633, shell_integration_osc133, - use_color, ); } // as the name implies, we do nothing in this case @@ -731,9 +731,9 @@ fn loop_iteration(ctx: LoopContext) -> (bool, Stack, Reedline) { run_finaliziation_ansi_sequence( &stack, engine_state, + use_color, shell_integration_osc633, shell_integration_osc133, - use_color, ); } Ok(Signal::CtrlD) => { @@ -742,9 +742,9 @@ fn loop_iteration(ctx: LoopContext) -> (bool, Stack, Reedline) { run_finaliziation_ansi_sequence( &stack, engine_state, + use_color, shell_integration_osc633, shell_integration_osc133, - use_color, ); println!(); @@ -763,9 +763,9 @@ fn loop_iteration(ctx: LoopContext) -> (bool, Stack, Reedline) { run_finaliziation_ansi_sequence( &stack, engine_state, + use_color, shell_integration_osc633, shell_integration_osc133, - use_color, ); } } @@ -1298,27 +1298,46 @@ fn map_nucursorshape_to_cursorshape(shape: NuCursorShape) -> Option String { +fn get_command_finished_marker( + stack: &Stack, + engine_state: &EngineState, + shell_integration_osc633: bool, + shell_integration_osc133: bool, +) -> String { let exit_code = stack .get_env_var(engine_state, "LAST_EXIT_CODE") .and_then(|e| e.as_i64().ok()); - if vscode { - // format!("\x1b]633;D;{}\x1b\\", exit_code.unwrap_or(0)) - format!( - "{}{}{}", - VSCODE_POST_EXECUTION_MARKER_PREFIX, - exit_code.unwrap_or(0), - VSCODE_POST_EXECUTION_MARKER_SUFFIX - ) - } else { - // format!("\x1b]133;D;{}\x1b\\", exit_code.unwrap_or(0)) + if shell_integration_osc633 { + if stack.get_env_var(engine_state, "TERM_PROGRAM") == Some(Value::test_string("vscode")) { + // We're in vscode and we have osc633 enabled + format!( + "{}{}{}", + VSCODE_POST_EXECUTION_MARKER_PREFIX, + exit_code.unwrap_or(0), + VSCODE_POST_EXECUTION_MARKER_SUFFIX + ) + } else if shell_integration_osc133 { + // If we're in VSCode but we don't find the env var, just return the regular markers + format!( + "{}{}{}", + POST_EXECUTION_MARKER_PREFIX, + exit_code.unwrap_or(0), + POST_EXECUTION_MARKER_SUFFIX + ) + } else { + // We're not in vscode, so we don't need to do anything special + "\x1b[0m".to_string() + } + } else if shell_integration_osc133 { format!( "{}{}{}", POST_EXECUTION_MARKER_PREFIX, exit_code.unwrap_or(0), POST_EXECUTION_MARKER_SUFFIX ) + } else { + "\x1b[0m".to_string() } } @@ -1342,7 +1361,12 @@ fn run_finaliziation_ansi_sequence( if stack.get_env_var(engine_state, "TERM_PROGRAM") == Some(Value::test_string("vscode")) { let start_time = Instant::now(); - run_ansi_sequence(&get_command_finished_marker(stack, engine_state, true)); + run_ansi_sequence(&get_command_finished_marker( + stack, + engine_state, + shell_integration_osc633, + shell_integration_osc133, + )); perf( "post_execute_marker (633;D) ansi escape sequences", @@ -1352,10 +1376,15 @@ fn run_finaliziation_ansi_sequence( column!(), use_color, ); - } else { + } else if shell_integration_osc133 { let start_time = Instant::now(); - run_ansi_sequence(&get_command_finished_marker(stack, engine_state, false)); + run_ansi_sequence(&get_command_finished_marker( + stack, + engine_state, + shell_integration_osc633, + shell_integration_osc133, + )); perf( "post_execute_marker (133;D) ansi escape sequences", @@ -1369,7 +1398,12 @@ fn run_finaliziation_ansi_sequence( } else if shell_integration_osc133 { let start_time = Instant::now(); - run_ansi_sequence(&get_command_finished_marker(stack, engine_state, false)); + run_ansi_sequence(&get_command_finished_marker( + stack, + engine_state, + shell_integration_osc633, + shell_integration_osc133, + )); perf( "post_execute_marker (133;D) ansi escape sequences", From 92831d7efcd0586a85966560beb0c538f4a0fc36 Mon Sep 17 00:00:00 2001 From: Andy Gayton Date: Wed, 8 May 2024 15:45:44 -0400 Subject: [PATCH 008/137] feat: add an echo command to nu_plugin_example (#12754) # Description This PR adds a new `echo` command to the `nu_plugin_example` plugin that simply [streams all of its input to its output](https://github.com/nushell/nushell/pull/12754/files#diff-de9fcf086b8c373039dadcc2bcb664c6014c0b2af8568eab68c0b6666ac5ccceR47). ``` : "hi" | example echo hi ``` The motivation for adding it is to have a convenient command to exercise interactivity on slow pipelines. I'll follow up on that front with [another PR](https://github.com/cablehead/nushell/pull/1/files) # Tests + Formatting https://github.com/nushell/nushell/pull/12754/files#diff-de9fcf086b8c373039dadcc2bcb664c6014c0b2af8568eab68c0b6666ac5ccceR51-R55 --- crates/nu_plugin_example/src/commands/echo.rs | 55 +++++++++++++++++++ crates/nu_plugin_example/src/commands/mod.rs | 2 + crates/nu_plugin_example/src/lib.rs | 1 + 3 files changed, 58 insertions(+) create mode 100644 crates/nu_plugin_example/src/commands/echo.rs diff --git a/crates/nu_plugin_example/src/commands/echo.rs b/crates/nu_plugin_example/src/commands/echo.rs new file mode 100644 index 0000000000..e60ccf8107 --- /dev/null +++ b/crates/nu_plugin_example/src/commands/echo.rs @@ -0,0 +1,55 @@ +use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; +use nu_protocol::{Category, Example, LabeledError, PipelineData, Signature, Type, Value}; + +use crate::ExamplePlugin; + +/// ` | example echo` +pub struct Echo; + +impl PluginCommand for Echo { + type Plugin = ExamplePlugin; + + fn name(&self) -> &str { + "example echo" + } + + fn usage(&self) -> &str { + "Example stream consumer that outputs the received input" + } + + fn signature(&self) -> Signature { + Signature::build(self.name()) + .input_output_types(vec![(Type::Any, Type::Any)]) + .category(Category::Experimental) + } + + fn search_terms(&self) -> Vec<&str> { + vec!["example"] + } + + fn examples(&self) -> Vec { + vec![Example { + example: "example seq 1 5 | example echo", + description: "echos the values from 1 to 5", + result: Some(Value::test_list( + (1..=5).map(Value::test_int).collect::>(), + )), + }] + } + + fn run( + &self, + _plugin: &ExamplePlugin, + _engine: &EngineInterface, + _call: &EvaluatedCall, + input: PipelineData, + ) -> Result { + Ok(input) + } +} + +#[test] +fn test_examples() -> Result<(), nu_protocol::ShellError> { + use nu_plugin_test_support::PluginTest; + PluginTest::new("example", ExamplePlugin.into())?.test_command_examples(&Echo) +} diff --git a/crates/nu_plugin_example/src/commands/mod.rs b/crates/nu_plugin_example/src/commands/mod.rs index 2d7ef4274a..9425dad4ca 100644 --- a/crates/nu_plugin_example/src/commands/mod.rs +++ b/crates/nu_plugin_example/src/commands/mod.rs @@ -25,12 +25,14 @@ pub use view_span::ViewSpan; // Stream demos mod collect_external; +mod echo; mod for_each; mod generate; mod seq; mod sum; pub use collect_external::CollectExternal; +pub use echo::Echo; pub use for_each::ForEach; pub use generate::Generate; pub use seq::Seq; diff --git a/crates/nu_plugin_example/src/lib.rs b/crates/nu_plugin_example/src/lib.rs index 0c394c78aa..e87c31229d 100644 --- a/crates/nu_plugin_example/src/lib.rs +++ b/crates/nu_plugin_example/src/lib.rs @@ -25,6 +25,7 @@ impl Plugin for ExamplePlugin { Box::new(DisableGc), // Stream demos Box::new(CollectExternal), + Box::new(Echo), Box::new(ForEach), Box::new(Generate), Box::new(Seq), From ba6f38510cd231e5ae4808b1a56cea6770f1e41b Mon Sep 17 00:00:00 2001 From: Stefan Holderbach Date: Thu, 9 May 2024 02:10:58 +0200 Subject: [PATCH 009/137] Shrink `Value` by boxing `Range`/`Closure` (#12784) # Description On 64-bit platforms the current size of `Value` is 56 bytes. The limiting variants were `Closure` and `Range`. Boxing the two reduces the size of Value to 48 bytes. This is the minimal size possible with our current 16-byte `Span` and any 24-byte `Vec` container which we use in several variants. (Note the extra full 8-bytes necessary for the discriminant or other smaller values due to the 8-byte alignment of `usize`) This is leads to a size reduction of ~15% for `Value` and should overall be beneficial as both `Range` and `Closure` are rarely used compared to the primitive types or even our general container types. # User-Facing Changes Less memory used, potential runtime benefits. (Too late in the evening to run the benchmarks myself right now) --- crates/nu-cli/src/prompt_update.rs | 2 +- crates/nu-cmd-lang/src/example_support.rs | 2 +- crates/nu-color-config/src/style_computer.rs | 5 ++++- crates/nu-command/src/filters/group_by.rs | 2 +- crates/nu-command/src/filters/insert.rs | 10 +++++----- crates/nu-command/src/filters/update.rs | 8 ++++---- crates/nu-command/src/filters/upsert.rs | 10 +++++----- crates/nu-command/src/filters/zip.rs | 2 +- crates/nu-plugin-engine/src/context.rs | 2 +- crates/nu-protocol/src/pipeline_data/mod.rs | 4 ++-- crates/nu-protocol/src/value/from_value.rs | 8 ++++---- crates/nu-protocol/src/value/mod.rs | 18 +++++++++--------- crates/nuon/src/to.rs | 2 +- 13 files changed, 39 insertions(+), 36 deletions(-) diff --git a/crates/nu-cli/src/prompt_update.rs b/crates/nu-cli/src/prompt_update.rs index 5fe2485ca8..827bff0e5a 100644 --- a/crates/nu-cli/src/prompt_update.rs +++ b/crates/nu-cli/src/prompt_update.rs @@ -65,7 +65,7 @@ fn get_prompt_string( .get_env_var(engine_state, prompt) .and_then(|v| match v { Value::Closure { val, .. } => { - let result = ClosureEvalOnce::new(engine_state, stack, val) + let result = ClosureEvalOnce::new(engine_state, stack, *val) .run_with_input(PipelineData::Empty); trace!( diff --git a/crates/nu-cmd-lang/src/example_support.rs b/crates/nu-cmd-lang/src/example_support.rs index 42dcd447b7..860572f349 100644 --- a/crates/nu-cmd-lang/src/example_support.rs +++ b/crates/nu-cmd-lang/src/example_support.rs @@ -223,7 +223,7 @@ impl<'a> std::fmt::Debug for DebuggableValue<'a> { Value::Date { val, .. } => { write!(f, "Date({:?})", val) } - Value::Range { val, .. } => match val { + Value::Range { val, .. } => match **val { Range::IntRange(range) => match range.end() { Bound::Included(end) => write!( f, diff --git a/crates/nu-color-config/src/style_computer.rs b/crates/nu-color-config/src/style_computer.rs index cd2454f011..2293439183 100644 --- a/crates/nu-color-config/src/style_computer.rs +++ b/crates/nu-color-config/src/style_computer.rs @@ -146,7 +146,10 @@ impl<'a> StyleComputer<'a> { let span = value.span(); match value { Value::Closure { val, .. } => { - map.insert(key.to_string(), ComputableStyle::Closure(val.clone(), span)); + map.insert( + key.to_string(), + ComputableStyle::Closure(*val.clone(), span), + ); } Value::Record { .. } => { map.insert( diff --git a/crates/nu-command/src/filters/group_by.rs b/crates/nu-command/src/filters/group_by.rs index acd5ae5b1a..24559c1eca 100644 --- a/crates/nu-command/src/filters/group_by.rs +++ b/crates/nu-command/src/filters/group_by.rs @@ -139,7 +139,7 @@ pub fn group_by( match grouper { Value::CellPath { val, .. } => group_cell_path(val, values)?, Value::Closure { val, .. } => { - group_closure(values, span, val, engine_state, stack)? + group_closure(values, span, *val, engine_state, stack)? } _ => { return Err(ShellError::TypeMismatch { diff --git a/crates/nu-command/src/filters/insert.rs b/crates/nu-command/src/filters/insert.rs index c87a2a78b9..d9fb165a16 100644 --- a/crates/nu-command/src/filters/insert.rs +++ b/crates/nu-command/src/filters/insert.rs @@ -133,7 +133,7 @@ fn insert( if let Value::Closure { val, .. } = replacement { match (cell_path.members.first(), &mut value) { (Some(PathMember::String { .. }), Value::List { vals, .. }) => { - let mut closure = ClosureEval::new(engine_state, stack, val); + let mut closure = ClosureEval::new(engine_state, stack, *val); for val in vals { insert_value_by_closure( val, @@ -147,7 +147,7 @@ fn insert( (first, _) => { insert_single_value_by_closure( &mut value, - ClosureEvalOnce::new(engine_state, stack, val), + ClosureEvalOnce::new(engine_state, stack, *val), head, &cell_path.members, matches!(first, Some(PathMember::Int { .. })), @@ -188,7 +188,7 @@ fn insert( let value = stream.next(); let end_of_stream = value.is_none(); let value = value.unwrap_or(Value::nothing(head)); - let new_value = ClosureEvalOnce::new(engine_state, stack, val) + let new_value = ClosureEvalOnce::new(engine_state, stack, *val) .run_with_value(value.clone())? .into_value(head); @@ -203,7 +203,7 @@ fn insert( if let Value::Closure { val, .. } = replacement { insert_single_value_by_closure( &mut value, - ClosureEvalOnce::new(engine_state, stack, val), + ClosureEvalOnce::new(engine_state, stack, *val), head, path, true, @@ -224,7 +224,7 @@ fn insert( .chain(stream) .into_pipeline_data_with_metadata(head, engine_state.ctrlc.clone(), metadata)) } else if let Value::Closure { val, .. } = replacement { - let mut closure = ClosureEval::new(engine_state, stack, val); + let mut closure = ClosureEval::new(engine_state, stack, *val); let stream = stream.map(move |mut value| { let err = insert_value_by_closure( &mut value, diff --git a/crates/nu-command/src/filters/update.rs b/crates/nu-command/src/filters/update.rs index 2cea7deead..d963e39995 100644 --- a/crates/nu-command/src/filters/update.rs +++ b/crates/nu-command/src/filters/update.rs @@ -117,7 +117,7 @@ fn update( if let Value::Closure { val, .. } = replacement { match (cell_path.members.first(), &mut value) { (Some(PathMember::String { .. }), Value::List { vals, .. }) => { - let mut closure = ClosureEval::new(engine_state, stack, val); + let mut closure = ClosureEval::new(engine_state, stack, *val); for val in vals { update_value_by_closure( val, @@ -131,7 +131,7 @@ fn update( (first, _) => { update_single_value_by_closure( &mut value, - ClosureEvalOnce::new(engine_state, stack, val), + ClosureEvalOnce::new(engine_state, stack, *val), head, &cell_path.members, matches!(first, Some(PathMember::Int { .. })), @@ -175,7 +175,7 @@ fn update( if let Value::Closure { val, .. } = replacement { update_single_value_by_closure( value, - ClosureEvalOnce::new(engine_state, stack, val), + ClosureEvalOnce::new(engine_state, stack, *val), head, path, true, @@ -189,7 +189,7 @@ fn update( .chain(stream) .into_pipeline_data_with_metadata(head, engine_state.ctrlc.clone(), metadata)) } else if let Value::Closure { val, .. } = replacement { - let mut closure = ClosureEval::new(engine_state, stack, val); + let mut closure = ClosureEval::new(engine_state, stack, *val); let stream = stream.map(move |mut value| { let err = update_value_by_closure( &mut value, diff --git a/crates/nu-command/src/filters/upsert.rs b/crates/nu-command/src/filters/upsert.rs index b7b4a782f2..6b62b1d7bc 100644 --- a/crates/nu-command/src/filters/upsert.rs +++ b/crates/nu-command/src/filters/upsert.rs @@ -163,7 +163,7 @@ fn upsert( if let Value::Closure { val, .. } = replacement { match (cell_path.members.first(), &mut value) { (Some(PathMember::String { .. }), Value::List { vals, .. }) => { - let mut closure = ClosureEval::new(engine_state, stack, val); + let mut closure = ClosureEval::new(engine_state, stack, *val); for val in vals { upsert_value_by_closure( val, @@ -177,7 +177,7 @@ fn upsert( (first, _) => { upsert_single_value_by_closure( &mut value, - ClosureEvalOnce::new(engine_state, stack, val), + ClosureEvalOnce::new(engine_state, stack, *val), head, &cell_path.members, matches!(first, Some(PathMember::Int { .. })), @@ -216,7 +216,7 @@ fn upsert( let value = if path.is_empty() { let value = stream.next().unwrap_or(Value::nothing(head)); if let Value::Closure { val, .. } = replacement { - ClosureEvalOnce::new(engine_state, stack, val) + ClosureEvalOnce::new(engine_state, stack, *val) .run_with_value(value)? .into_value(head) } else { @@ -226,7 +226,7 @@ fn upsert( if let Value::Closure { val, .. } = replacement { upsert_single_value_by_closure( &mut value, - ClosureEvalOnce::new(engine_state, stack, val), + ClosureEvalOnce::new(engine_state, stack, *val), head, path, true, @@ -249,7 +249,7 @@ fn upsert( .chain(stream) .into_pipeline_data_with_metadata(head, engine_state.ctrlc.clone(), metadata)) } else if let Value::Closure { val, .. } = replacement { - let mut closure = ClosureEval::new(engine_state, stack, val); + let mut closure = ClosureEval::new(engine_state, stack, *val); let stream = stream.map(move |mut value| { let err = upsert_value_by_closure( &mut value, diff --git a/crates/nu-command/src/filters/zip.rs b/crates/nu-command/src/filters/zip.rs index f4ee739f50..9d81451ed4 100644 --- a/crates/nu-command/src/filters/zip.rs +++ b/crates/nu-command/src/filters/zip.rs @@ -103,7 +103,7 @@ impl Command for Zip { let metadata = input.metadata(); let other = if let Value::Closure { val, .. } = other { // If a closure was provided, evaluate it and consume its stream output - ClosureEvalOnce::new(engine_state, stack, val).run_with_input(PipelineData::Empty)? + ClosureEvalOnce::new(engine_state, stack, *val).run_with_input(PipelineData::Empty)? } else { other.into_pipeline_data() }; diff --git a/crates/nu-plugin-engine/src/context.rs b/crates/nu-plugin-engine/src/context.rs index 3f77d85477..0fb7b95b4c 100644 --- a/crates/nu-plugin-engine/src/context.rs +++ b/crates/nu-plugin-engine/src/context.rs @@ -106,7 +106,7 @@ impl<'a> PluginExecutionContext for PluginExecutionCommandContext<'a> { let span = value.span(); match value { Value::Closure { val, .. } => { - ClosureEvalOnce::new(&self.engine_state, &self.stack, val) + ClosureEvalOnce::new(&self.engine_state, &self.stack, *val) .run_with_input(PipelineData::Empty) .map(|data| data.into_value(span)) .unwrap_or_else(|err| Value::error(err, self.call.head)) diff --git a/crates/nu-protocol/src/pipeline_data/mod.rs b/crates/nu-protocol/src/pipeline_data/mod.rs index 71c667fa70..5b36cf871f 100644 --- a/crates/nu-protocol/src/pipeline_data/mod.rs +++ b/crates/nu-protocol/src/pipeline_data/mod.rs @@ -421,7 +421,7 @@ impl PipelineData { ) .into_iter(), ), - Value::Range { val, .. } => PipelineIteratorInner::ListStream( + Value::Range { ref val, .. } => PipelineIteratorInner::ListStream( ListStream::new(val.into_range_iter(value.span(), None), val_span, None) .into_iter(), ), @@ -801,7 +801,7 @@ impl PipelineData { let span = v.span(); match v { Value::Range { val, .. } => { - match val { + match *val { Range::IntRange(range) => { if range.is_unbounded() { return Err(ShellError::GenericError { diff --git a/crates/nu-protocol/src/value/from_value.rs b/crates/nu-protocol/src/value/from_value.rs index 9fc9d4c8b6..97fb95d482 100644 --- a/crates/nu-protocol/src/value/from_value.rs +++ b/crates/nu-protocol/src/value/from_value.rs @@ -442,7 +442,7 @@ impl FromValue for Spanned> { impl FromValue for Range { fn from_value(v: Value) -> Result { match v { - Value::Range { val, .. } => Ok(val), + Value::Range { val, .. } => Ok(*val), v => Err(ShellError::CantConvert { to_type: "range".into(), from_type: v.get_type().to_string(), @@ -457,7 +457,7 @@ impl FromValue for Spanned { fn from_value(v: Value) -> Result { let span = v.span(); match v { - Value::Range { val, .. } => Ok(Spanned { item: val, span }), + Value::Range { val, .. } => Ok(Spanned { item: *val, span }), v => Err(ShellError::CantConvert { to_type: "range".into(), from_type: v.get_type().to_string(), @@ -552,7 +552,7 @@ impl FromValue for Record { impl FromValue for Closure { fn from_value(v: Value) -> Result { match v { - Value::Closure { val, .. } => Ok(val), + Value::Closure { val, .. } => Ok(*val), v => Err(ShellError::CantConvert { to_type: "Closure".into(), from_type: v.get_type().to_string(), @@ -567,7 +567,7 @@ impl FromValue for Spanned { fn from_value(v: Value) -> Result { let span = v.span(); match v { - Value::Closure { val, .. } => Ok(Spanned { item: val, span }), + Value::Closure { val, .. } => Ok(Spanned { item: *val, span }), v => Err(ShellError::CantConvert { to_type: "Closure".into(), from_type: v.get_type().to_string(), diff --git a/crates/nu-protocol/src/value/mod.rs b/crates/nu-protocol/src/value/mod.rs index dbfa93b793..f924218b79 100644 --- a/crates/nu-protocol/src/value/mod.rs +++ b/crates/nu-protocol/src/value/mod.rs @@ -86,7 +86,7 @@ pub enum Value { internal_span: Span, }, Range { - val: Range, + val: Box, // note: spans are being refactored out of Value // please use .span() instead of matching this span value #[serde(rename = "span")] @@ -122,7 +122,7 @@ pub enum Value { internal_span: Span, }, Closure { - val: Closure, + val: Box, // note: spans are being refactored out of Value // please use .span() instead of matching this span value #[serde(rename = "span")] @@ -182,7 +182,7 @@ impl Clone for Value { internal_span: *internal_span, }, Value::Range { val, internal_span } => Value::Range { - val: *val, + val: val.clone(), internal_span: *internal_span, }, Value::Float { val, internal_span } => Value::float(*val, *internal_span), @@ -327,7 +327,7 @@ impl Value { /// Returns a reference to the inner [`Range`] value or an error if this `Value` is not a range pub fn as_range(&self) -> Result { if let Value::Range { val, .. } = self { - Ok(*val) + Ok(**val) } else { self.cant_convert_to("range") } @@ -336,7 +336,7 @@ impl Value { /// Unwraps the inner [`Range`] value or returns an error if this `Value` is not a range pub fn into_range(self) -> Result { if let Value::Range { val, .. } = self { - Ok(val) + Ok(*val) } else { self.cant_convert_to("range") } @@ -553,7 +553,7 @@ impl Value { /// Unwraps the inner [`Closure`] value or returns an error if this `Value` is not a closure pub fn into_closure(self) -> Result { if let Value::Closure { val, .. } = self { - Ok(val) + Ok(*val) } else { self.cant_convert_to("closure") } @@ -1012,7 +1012,7 @@ impl Value { }); } } - Value::Range { val, .. } => { + Value::Range { ref val, .. } => { if let Some(item) = val.into_range_iter(current.span(), None).nth(*count) { @@ -1826,7 +1826,7 @@ impl Value { pub fn range(val: Range, span: Span) -> Value { Value::Range { - val, + val: val.into(), internal_span: span, } } @@ -1862,7 +1862,7 @@ impl Value { pub fn closure(val: Closure, span: Span) -> Value { Value::Closure { - val, + val: val.into(), internal_span: span, } } diff --git a/crates/nuon/src/to.rs b/crates/nuon/src/to.rs index 8aeb47a097..57f208dcc0 100644 --- a/crates/nuon/src/to.rs +++ b/crates/nuon/src/to.rs @@ -176,7 +176,7 @@ fn value_to_string( } } Value::Nothing { .. } => Ok("null".to_string()), - Value::Range { val, .. } => match val { + Value::Range { val, .. } => match **val { Range::IntRange(range) => Ok(range.to_string()), Range::FloatRange(range) => { let start = From 948b299e657004bed7af788e5423f32c7ab5526f Mon Sep 17 00:00:00 2001 From: Ian Manske Date: Thu, 9 May 2024 02:16:57 +0000 Subject: [PATCH 010/137] Fix/simplify cwd in benchmarks (#12812) # Description The benchmarks currently panic when trying to set the initial CWD. This is because the code that sets the CWD also tries to get the CWD. --- benches/benchmarks.rs | 35 +++++++---------------------------- 1 file changed, 7 insertions(+), 28 deletions(-) diff --git a/benches/benchmarks.rs b/benches/benchmarks.rs index e291eeebcc..9de7cc5758 100644 --- a/benches/benchmarks.rs +++ b/benches/benchmarks.rs @@ -9,10 +9,7 @@ use nu_protocol::{ }; use nu_std::load_standard_library; use nu_utils::{get_default_config, get_default_env}; -use std::{ - path::{Path, PathBuf}, - rc::Rc, -}; +use std::rc::Rc; use std::hint::black_box; @@ -22,34 +19,16 @@ fn load_bench_commands() -> EngineState { nu_command::add_shell_command_context(nu_cmd_lang::create_default_context()) } -fn canonicalize_path(engine_state: &EngineState, path: &Path) -> PathBuf { - let cwd = engine_state.cwd_as_string(None).unwrap(); - - if path.exists() { - match nu_path::canonicalize_with(path, cwd) { - Ok(canon_path) => canon_path, - Err(_) => path.to_owned(), - } - } else { - path.to_owned() - } -} - -fn get_home_path(engine_state: &EngineState) -> PathBuf { - nu_path::home_dir() - .map(|path| canonicalize_path(engine_state, &path)) - .unwrap_or_default() -} - fn setup_engine() -> EngineState { let mut engine_state = load_bench_commands(); - let home_path = get_home_path(&engine_state); + let cwd = std::env::current_dir() + .unwrap() + .into_os_string() + .into_string() + .unwrap(); // parsing config.nu breaks without PWD set, so set a valid path - engine_state.add_env_var( - "PWD".into(), - Value::string(home_path.to_string_lossy(), Span::test_data()), - ); + engine_state.add_env_var("PWD".into(), Value::string(cwd, Span::test_data())); let nu_const = create_nu_constant(&engine_state, Span::unknown()) .expect("Failed to create nushell constant."); From 3b3f48202c10ceb29fea346e6dd76e07cdb80766 Mon Sep 17 00:00:00 2001 From: Ian Manske Date: Thu, 9 May 2024 05:36:47 +0000 Subject: [PATCH 011/137] Refactor message printing in `rm` (#12799) # Description Changes the iterator in `rm` to be an iterator over `Result, ShellError>` (an optional message or error) instead of an iterator over `Value`. Then, the iterator is consumed and each message is printed. This allows the `PipelineData::print_not_formatted` method to be removed. --- crates/nu-command/src/filesystem/rm.rs | 236 ++++++++++---------- crates/nu-protocol/src/pipeline_data/mod.rs | 26 --- 2 files changed, 116 insertions(+), 146 deletions(-) diff --git a/crates/nu-command/src/filesystem/rm.rs b/crates/nu-command/src/filesystem/rm.rs index 9b9e88b5ff..9696ae0c2f 100644 --- a/crates/nu-command/src/filesystem/rm.rs +++ b/crates/nu-command/src/filesystem/rm.rs @@ -3,7 +3,7 @@ use super::util::{get_rest_for_glob_pattern, try_interaction}; use nu_engine::{command_prelude::*, env::current_dir}; use nu_glob::MatchOptions; use nu_path::expand_path_with; -use nu_protocol::NuGlob; +use nu_protocol::{report_error_new, NuGlob}; #[cfg(unix)] use std::os::unix::prelude::FileTypeExt; use std::{ @@ -118,8 +118,6 @@ fn rm( let interactive = call.has_flag(engine_state, stack, "interactive")?; let interactive_once = call.has_flag(engine_state, stack, "interactive-once")? && !interactive; - let ctrlc = engine_state.ctrlc.clone(); - let mut paths = get_rest_for_glob_pattern(engine_state, stack, call, 0)?; if paths.is_empty() { @@ -341,132 +339,130 @@ fn rm( } } - all_targets - .into_iter() - .map(move |(f, span)| { - let is_empty = || match f.read_dir() { - Ok(mut p) => p.next().is_none(), - Err(_) => false, - }; + let iter = all_targets.into_iter().map(move |(f, span)| { + let is_empty = || match f.read_dir() { + Ok(mut p) => p.next().is_none(), + Err(_) => false, + }; - if let Ok(metadata) = f.symlink_metadata() { - #[cfg(unix)] - let is_socket = metadata.file_type().is_socket(); - #[cfg(unix)] - let is_fifo = metadata.file_type().is_fifo(); + if let Ok(metadata) = f.symlink_metadata() { + #[cfg(unix)] + let is_socket = metadata.file_type().is_socket(); + #[cfg(unix)] + let is_fifo = metadata.file_type().is_fifo(); - #[cfg(not(unix))] - let is_socket = false; - #[cfg(not(unix))] - let is_fifo = false; + #[cfg(not(unix))] + let is_socket = false; + #[cfg(not(unix))] + let is_fifo = false; - if metadata.is_file() - || metadata.file_type().is_symlink() - || recursive - || is_socket - || is_fifo - || is_empty() - { - let (interaction, confirmed) = try_interaction( - interactive, - format!("rm: remove '{}'? ", f.to_string_lossy()), - ); + if metadata.is_file() + || metadata.file_type().is_symlink() + || recursive + || is_socket + || is_fifo + || is_empty() + { + let (interaction, confirmed) = try_interaction( + interactive, + format!("rm: remove '{}'? ", f.to_string_lossy()), + ); - let result = if let Err(e) = interaction { - let e = Error::new(ErrorKind::Other, &*e.to_string()); - Err(e) - } else if interactive && !confirmed { - Ok(()) - } else if TRASH_SUPPORTED && (trash || (rm_always_trash && !permanent)) { - #[cfg(all( - feature = "trash-support", - not(any(target_os = "android", target_os = "ios")) - ))] - { - trash::delete(&f).map_err(|e: trash::Error| { - Error::new( - ErrorKind::Other, - format!("{e:?}\nTry '--permanent' flag"), - ) - }) - } - - // Should not be reachable since we error earlier if - // these options are given on an unsupported platform - #[cfg(any( - not(feature = "trash-support"), - target_os = "android", - target_os = "ios" - ))] - { - unreachable!() - } - } else if metadata.is_symlink() { - // In Windows, symlink pointing to a directory can be removed using - // std::fs::remove_dir instead of std::fs::remove_file. - #[cfg(windows)] - { - f.metadata().and_then(|metadata| { - if metadata.is_dir() { - std::fs::remove_dir(&f) - } else { - std::fs::remove_file(&f) - } - }) - } - - #[cfg(not(windows))] - std::fs::remove_file(&f) - } else if metadata.is_file() || is_socket || is_fifo { - std::fs::remove_file(&f) - } else { - std::fs::remove_dir_all(&f) - }; - - if let Err(e) = result { - let msg = format!("Could not delete {:}: {e:}", f.to_string_lossy()); - Value::error(ShellError::RemoveNotPossible { msg, span }, span) - } else if verbose { - let msg = if interactive && !confirmed { - "not deleted" - } else { - "deleted" - }; - let val = format!("{} {:}", msg, f.to_string_lossy()); - Value::string(val, span) - } else { - Value::nothing(span) + let result = if let Err(e) = interaction { + Err(Error::new(ErrorKind::Other, &*e.to_string())) + } else if interactive && !confirmed { + Ok(()) + } else if TRASH_SUPPORTED && (trash || (rm_always_trash && !permanent)) { + #[cfg(all( + feature = "trash-support", + not(any(target_os = "android", target_os = "ios")) + ))] + { + trash::delete(&f).map_err(|e: trash::Error| { + Error::new(ErrorKind::Other, format!("{e:?}\nTry '--permanent' flag")) + }) } + + // Should not be reachable since we error earlier if + // these options are given on an unsupported platform + #[cfg(any( + not(feature = "trash-support"), + target_os = "android", + target_os = "ios" + ))] + { + unreachable!() + } + } else if metadata.is_symlink() { + // In Windows, symlink pointing to a directory can be removed using + // std::fs::remove_dir instead of std::fs::remove_file. + #[cfg(windows)] + { + f.metadata().and_then(|metadata| { + if metadata.is_dir() { + std::fs::remove_dir(&f) + } else { + std::fs::remove_file(&f) + } + }) + } + + #[cfg(not(windows))] + std::fs::remove_file(&f) + } else if metadata.is_file() || is_socket || is_fifo { + std::fs::remove_file(&f) } else { - let error = format!("Cannot remove {:}. try --recursive", f.to_string_lossy()); - Value::error( - ShellError::GenericError { - error, - msg: "cannot remove non-empty directory".into(), - span: Some(span), - help: None, - inner: vec![], - }, - span, - ) + std::fs::remove_dir_all(&f) + }; + + if let Err(e) = result { + let msg = format!("Could not delete {:}: {e:}", f.to_string_lossy()); + Err(ShellError::RemoveNotPossible { msg, span }) + } else if verbose { + let msg = if interactive && !confirmed { + "not deleted" + } else { + "deleted" + }; + Ok(Some(format!("{} {:}", msg, f.to_string_lossy()))) + } else { + Ok(None) } } else { - let error = format!("no such file or directory: {:}", f.to_string_lossy()); - Value::error( - ShellError::GenericError { - error, - msg: "no such file or directory".into(), - span: Some(span), - help: None, - inner: vec![], - }, - span, - ) + let error = format!("Cannot remove {:}. try --recursive", f.to_string_lossy()); + Err(ShellError::GenericError { + error, + msg: "cannot remove non-empty directory".into(), + span: Some(span), + help: None, + inner: vec![], + }) } - }) - .filter(|x| !matches!(x.get_type(), Type::Nothing)) - .into_pipeline_data(span, ctrlc) - .print_not_formatted(engine_state, false, true)?; + } else { + let error = format!("no such file or directory: {:}", f.to_string_lossy()); + Err(ShellError::GenericError { + error, + msg: "no such file or directory".into(), + span: Some(span), + help: None, + inner: vec![], + }) + } + }); + + for result in iter { + if nu_utils::ctrl_c::was_pressed(&engine_state.ctrlc) { + return Err(ShellError::InterruptedByUser { + span: Some(call.head), + }); + } + + match result { + Ok(None) => {} + Ok(Some(msg)) => eprintln!("{msg}"), + Err(err) => report_error_new(engine_state, &err), + } + } Ok(PipelineData::empty()) } diff --git a/crates/nu-protocol/src/pipeline_data/mod.rs b/crates/nu-protocol/src/pipeline_data/mod.rs index 5b36cf871f..297eb19c55 100644 --- a/crates/nu-protocol/src/pipeline_data/mod.rs +++ b/crates/nu-protocol/src/pipeline_data/mod.rs @@ -877,32 +877,6 @@ impl PipelineData { Ok(0) } - /// Consume and print self data immediately. - /// - /// Unlike [`.print()`] does not call `table` to format data and just prints it - /// one element on a line - /// * `no_newline` controls if we need to attach newline character to output. - /// * `to_stderr` controls if data is output to stderr, when the value is false, the data is output to stdout. - pub fn print_not_formatted( - self, - engine_state: &EngineState, - no_newline: bool, - to_stderr: bool, - ) -> Result { - if let PipelineData::ExternalStream { - stdout: stream, - stderr: stderr_stream, - exit_code, - .. - } = self - { - print_if_stream(stream, stderr_stream, to_stderr, exit_code) - } else { - let config = engine_state.get_config(); - self.write_all_and_flush(engine_state, config, no_newline, to_stderr) - } - } - fn write_all_and_flush( self, engine_state: &EngineState, From 7271ad7909445ad714063b0dedd4b6719b0a4f59 Mon Sep 17 00:00:00 2001 From: Ian Manske Date: Thu, 9 May 2024 05:38:24 +0000 Subject: [PATCH 012/137] Pass `Stack` ref to `Completer::fetch` (#12783) # Description Adds an additional `&Stack` parameter to `Completer::fetch` so that the completers don't have to store a `Stack` themselves. I also removed unnecessary `EngineState`s from the completers, since the same `EngineState` is available in the `working_set.permanent_state` also passed to `Completer::fetch`. --- crates/nu-cli/src/completions/base.rs | 7 +- .../src/completions/command_completions.rs | 20 +++--- crates/nu-cli/src/completions/completer.rs | 54 +++++---------- .../src/completions/custom_completions.rs | 15 ++-- .../src/completions/directory_completions.rs | 28 +++----- .../src/completions/dotnu_completions.rs | 69 ++++++++----------- .../src/completions/file_completions.rs | 28 +++----- .../src/completions/flag_completions.rs | 5 +- .../src/completions/variable_completions.rs | 35 ++++------ crates/nu-cli/src/repl.rs | 2 +- crates/nu-cli/tests/completions.rs | 59 ++++++++-------- crates/nu-lsp/src/lib.rs | 4 +- src/ide.rs | 3 +- 13 files changed, 146 insertions(+), 183 deletions(-) diff --git a/crates/nu-cli/src/completions/base.rs b/crates/nu-cli/src/completions/base.rs index c4290b8767..0debabe688 100644 --- a/crates/nu-cli/src/completions/base.rs +++ b/crates/nu-cli/src/completions/base.rs @@ -1,13 +1,18 @@ use crate::completions::{CompletionOptions, SortBy}; -use nu_protocol::{engine::StateWorkingSet, levenshtein_distance, Span}; +use nu_protocol::{ + engine::{Stack, StateWorkingSet}, + levenshtein_distance, Span, +}; use reedline::Suggestion; // Completer trait represents the three stages of the completion // fetch, filter and sort pub trait Completer { + #[allow(clippy::too_many_arguments)] fn fetch( &mut self, working_set: &StateWorkingSet, + stack: &Stack, prefix: Vec, span: Span, offset: usize, diff --git a/crates/nu-cli/src/completions/command_completions.rs b/crates/nu-cli/src/completions/command_completions.rs index 42094f9c97..2549854540 100644 --- a/crates/nu-cli/src/completions/command_completions.rs +++ b/crates/nu-cli/src/completions/command_completions.rs @@ -4,16 +4,14 @@ use crate::{ }; use nu_parser::FlatShape; use nu_protocol::{ - engine::{CachedFile, EngineState, StateWorkingSet}, + engine::{CachedFile, Stack, StateWorkingSet}, Span, }; use reedline::Suggestion; -use std::sync::Arc; use super::SemanticSuggestion; pub struct CommandCompletion { - engine_state: Arc, flattened: Vec<(Span, FlatShape)>, flat_shape: FlatShape, force_completion_after_space: bool, @@ -21,14 +19,11 @@ pub struct CommandCompletion { impl CommandCompletion { pub fn new( - engine_state: Arc, - _: &StateWorkingSet, flattened: Vec<(Span, FlatShape)>, flat_shape: FlatShape, force_completion_after_space: bool, ) -> Self { Self { - engine_state, flattened, flat_shape, force_completion_after_space, @@ -37,13 +32,14 @@ impl CommandCompletion { fn external_command_completion( &self, + working_set: &StateWorkingSet, prefix: &str, match_algorithm: MatchAlgorithm, ) -> Vec { let mut executables = vec![]; // os agnostic way to get the PATH env var - let paths = self.engine_state.get_path_env_var(); + let paths = working_set.permanent_state.get_path_env_var(); if let Some(paths) = paths { if let Ok(paths) = paths.as_list() { @@ -52,7 +48,10 @@ impl CommandCompletion { if let Ok(mut contents) = std::fs::read_dir(path.as_ref()) { while let Some(Ok(item)) = contents.next() { - if self.engine_state.config.max_external_completion_results + if working_set + .permanent_state + .config + .max_external_completion_results > executables.len() as i64 && !executables.contains( &item @@ -114,7 +113,7 @@ impl CommandCompletion { if find_externals { let results_external = self - .external_command_completion(&partial, match_algorithm) + .external_command_completion(working_set, &partial, match_algorithm) .into_iter() .map(move |x| SemanticSuggestion { suggestion: Suggestion { @@ -161,6 +160,7 @@ impl Completer for CommandCompletion { fn fetch( &mut self, working_set: &StateWorkingSet, + _stack: &Stack, _prefix: Vec, span: Span, offset: usize, @@ -266,6 +266,8 @@ pub fn is_passthrough_command(working_set_file_contents: &[CachedFile]) -> bool #[cfg(test)] mod command_completions_tests { use super::*; + use nu_protocol::engine::EngineState; + use std::sync::Arc; #[test] fn test_find_non_whitespace_index() { diff --git a/crates/nu-cli/src/completions/completer.rs b/crates/nu-cli/src/completions/completer.rs index 5837772d54..348111f009 100644 --- a/crates/nu-cli/src/completions/completer.rs +++ b/crates/nu-cli/src/completions/completer.rs @@ -22,10 +22,10 @@ pub struct NuCompleter { } impl NuCompleter { - pub fn new(engine_state: Arc, stack: Stack) -> Self { + pub fn new(engine_state: Arc, stack: Arc) -> Self { Self { engine_state, - stack: stack.reset_out_dest().capture(), + stack: Stack::with_parent(stack).reset_out_dest().capture(), } } @@ -52,8 +52,15 @@ impl NuCompleter { }; // Fetch - let mut suggestions = - completer.fetch(working_set, prefix.clone(), new_span, offset, pos, &options); + let mut suggestions = completer.fetch( + working_set, + &self.stack, + prefix.clone(), + new_span, + offset, + pos, + &options, + ); // Sort suggestions = completer.sort(suggestions, prefix); @@ -175,11 +182,8 @@ impl NuCompleter { // Variables completion if prefix.starts_with(b"$") || most_left_var.is_some() { - let mut completer = VariableCompletion::new( - self.engine_state.clone(), - self.stack.clone(), - most_left_var.unwrap_or((vec![], vec![])), - ); + let mut completer = + VariableCompletion::new(most_left_var.unwrap_or((vec![], vec![]))); return self.process_completion( &mut completer, @@ -224,8 +228,6 @@ impl NuCompleter { || (flat_idx == 0 && working_set.get_span_contents(new_span).is_empty()) { let mut completer = CommandCompletion::new( - self.engine_state.clone(), - &working_set, flattened.clone(), // flat_idx, FlatShape::String, @@ -253,10 +255,7 @@ impl NuCompleter { || prev_expr_str == b"overlay use" || prev_expr_str == b"source-env" { - let mut completer = DotNuCompletion::new( - self.engine_state.clone(), - self.stack.clone(), - ); + let mut completer = DotNuCompletion::new(); return self.process_completion( &mut completer, @@ -267,10 +266,7 @@ impl NuCompleter { pos, ); } else if prev_expr_str == b"ls" { - let mut completer = FileCompletion::new( - self.engine_state.clone(), - self.stack.clone(), - ); + let mut completer = FileCompletion::new(); return self.process_completion( &mut completer, @@ -288,7 +284,6 @@ impl NuCompleter { match &flat.1 { FlatShape::Custom(decl_id) => { let mut completer = CustomCompletion::new( - self.engine_state.clone(), self.stack.clone(), *decl_id, initial_line, @@ -304,10 +299,7 @@ impl NuCompleter { ); } FlatShape::Directory => { - let mut completer = DirectoryCompletion::new( - self.engine_state.clone(), - self.stack.clone(), - ); + let mut completer = DirectoryCompletion::new(); return self.process_completion( &mut completer, @@ -319,10 +311,7 @@ impl NuCompleter { ); } FlatShape::Filepath | FlatShape::GlobPattern => { - let mut completer = FileCompletion::new( - self.engine_state.clone(), - self.stack.clone(), - ); + let mut completer = FileCompletion::new(); return self.process_completion( &mut completer, @@ -335,8 +324,6 @@ impl NuCompleter { } flat_shape => { let mut completer = CommandCompletion::new( - self.engine_state.clone(), - &working_set, flattened.clone(), // flat_idx, flat_shape.clone(), @@ -369,10 +356,7 @@ impl NuCompleter { } // Check for file completion - let mut completer = FileCompletion::new( - self.engine_state.clone(), - self.stack.clone(), - ); + let mut completer = FileCompletion::new(); out = self.process_completion( &mut completer, &working_set, @@ -557,7 +541,7 @@ mod completer_tests { result.err().unwrap() ); - let mut completer = NuCompleter::new(engine_state.into(), Stack::new()); + let mut completer = NuCompleter::new(engine_state.into(), Arc::new(Stack::new())); let dataset = [ ("sudo", false, "", Vec::new()), ("sudo l", true, "l", vec!["ls", "let", "lines", "loop"]), diff --git a/crates/nu-cli/src/completions/custom_completions.rs b/crates/nu-cli/src/completions/custom_completions.rs index 12a7762e94..d2ccd5191d 100644 --- a/crates/nu-cli/src/completions/custom_completions.rs +++ b/crates/nu-cli/src/completions/custom_completions.rs @@ -6,14 +6,13 @@ use nu_engine::eval_call; use nu_protocol::{ ast::{Argument, Call, Expr, Expression}, debugger::WithoutDebug, - engine::{EngineState, Stack, StateWorkingSet}, + engine::{Stack, StateWorkingSet}, PipelineData, Span, Type, Value, }; use nu_utils::IgnoreCaseExt; -use std::{collections::HashMap, sync::Arc}; +use std::collections::HashMap; pub struct CustomCompletion { - engine_state: Arc, stack: Stack, decl_id: usize, line: String, @@ -21,10 +20,9 @@ pub struct CustomCompletion { } impl CustomCompletion { - pub fn new(engine_state: Arc, stack: Stack, decl_id: usize, line: String) -> Self { + pub fn new(stack: Stack, decl_id: usize, line: String) -> Self { Self { - engine_state, - stack: stack.reset_out_dest().capture(), + stack, decl_id, line, sort_by: SortBy::None, @@ -35,7 +33,8 @@ impl CustomCompletion { impl Completer for CustomCompletion { fn fetch( &mut self, - _: &StateWorkingSet, + working_set: &StateWorkingSet, + _stack: &Stack, prefix: Vec, span: Span, offset: usize, @@ -47,7 +46,7 @@ impl Completer for CustomCompletion { // Call custom declaration let result = eval_call::( - &self.engine_state, + working_set.permanent_state, &mut self.stack, &Call { decl_id: self.decl_id, diff --git a/crates/nu-cli/src/completions/directory_completions.rs b/crates/nu-cli/src/completions/directory_completions.rs index e8d463c19f..024322f997 100644 --- a/crates/nu-cli/src/completions/directory_completions.rs +++ b/crates/nu-cli/src/completions/directory_completions.rs @@ -8,25 +8,16 @@ use nu_protocol::{ levenshtein_distance, Span, }; use reedline::Suggestion; -use std::{ - path::{Path, MAIN_SEPARATOR as SEP}, - sync::Arc, -}; +use std::path::{Path, MAIN_SEPARATOR as SEP}; use super::SemanticSuggestion; -#[derive(Clone)] -pub struct DirectoryCompletion { - engine_state: Arc, - stack: Stack, -} +#[derive(Clone, Default)] +pub struct DirectoryCompletion {} impl DirectoryCompletion { - pub fn new(engine_state: Arc, stack: Stack) -> Self { - Self { - engine_state, - stack, - } + pub fn new() -> Self { + Self::default() } } @@ -34,10 +25,11 @@ impl Completer for DirectoryCompletion { fn fetch( &mut self, working_set: &StateWorkingSet, + stack: &Stack, prefix: Vec, span: Span, offset: usize, - _: usize, + _pos: usize, options: &CompletionOptions, ) -> Vec { let AdjustView { prefix, span, .. } = adjust_if_intermediate(&prefix, working_set, span); @@ -47,10 +39,10 @@ impl Completer for DirectoryCompletion { let output: Vec<_> = directory_completion( span, &prefix, - &self.engine_state.current_work_dir(), + &working_set.permanent_state.current_work_dir(), options, - self.engine_state.as_ref(), - &self.stack, + working_set.permanent_state, + stack, ) .into_iter() .map(move |x| SemanticSuggestion { diff --git a/crates/nu-cli/src/completions/dotnu_completions.rs b/crates/nu-cli/src/completions/dotnu_completions.rs index 8927738491..c939578b41 100644 --- a/crates/nu-cli/src/completions/dotnu_completions.rs +++ b/crates/nu-cli/src/completions/dotnu_completions.rs @@ -1,39 +1,31 @@ use crate::completions::{file_path_completion, Completer, CompletionOptions, SortBy}; use nu_protocol::{ - engine::{EngineState, Stack, StateWorkingSet}, + engine::{Stack, StateWorkingSet}, Span, }; use reedline::Suggestion; -use std::{ - path::{is_separator, Path, MAIN_SEPARATOR as SEP, MAIN_SEPARATOR_STR}, - sync::Arc, -}; +use std::path::{is_separator, Path, MAIN_SEPARATOR as SEP, MAIN_SEPARATOR_STR}; use super::SemanticSuggestion; -#[derive(Clone)] -pub struct DotNuCompletion { - engine_state: Arc, - stack: Stack, -} +#[derive(Clone, Default)] +pub struct DotNuCompletion {} impl DotNuCompletion { - pub fn new(engine_state: Arc, stack: Stack) -> Self { - Self { - engine_state, - stack, - } + pub fn new() -> Self { + Self::default() } } impl Completer for DotNuCompletion { fn fetch( &mut self, - _: &StateWorkingSet, + working_set: &StateWorkingSet, + stack: &Stack, prefix: Vec, span: Span, offset: usize, - _: usize, + _pos: usize, options: &CompletionOptions, ) -> Vec { let prefix_str = String::from_utf8_lossy(&prefix).replace('`', ""); @@ -49,26 +41,25 @@ impl Completer for DotNuCompletion { let mut is_current_folder = false; // Fetch the lib dirs - let lib_dirs: Vec = - if let Some(lib_dirs) = self.engine_state.get_env_var("NU_LIB_DIRS") { - lib_dirs - .as_list() - .into_iter() - .flat_map(|it| { - it.iter().map(|x| { - x.to_path() - .expect("internal error: failed to convert lib path") - }) + let lib_dirs: Vec = if let Some(lib_dirs) = working_set.get_env_var("NU_LIB_DIRS") { + lib_dirs + .as_list() + .into_iter() + .flat_map(|it| { + it.iter().map(|x| { + x.to_path() + .expect("internal error: failed to convert lib path") }) - .map(|it| { - it.into_os_string() - .into_string() - .expect("internal error: failed to convert OS path") - }) - .collect() - } else { - vec![] - }; + }) + .map(|it| { + it.into_os_string() + .into_string() + .expect("internal error: failed to convert OS path") + }) + .collect() + } else { + vec![] + }; // Check if the base_dir is a folder // rsplit_once removes the separator @@ -85,7 +76,7 @@ impl Completer for DotNuCompletion { } else { // Fetch the current folder #[allow(deprecated)] - let current_folder = self.engine_state.current_work_dir(); + let current_folder = working_set.permanent_state.current_work_dir(); is_current_folder = true; // Add the current folder and the lib dirs into the @@ -104,8 +95,8 @@ impl Completer for DotNuCompletion { &partial, &search_dir, options, - self.engine_state.as_ref(), - &self.stack, + working_set.permanent_state, + stack, ); completions .into_iter() diff --git a/crates/nu-cli/src/completions/file_completions.rs b/crates/nu-cli/src/completions/file_completions.rs index 1a99c995db..f6205f6792 100644 --- a/crates/nu-cli/src/completions/file_completions.rs +++ b/crates/nu-cli/src/completions/file_completions.rs @@ -9,25 +9,16 @@ use nu_protocol::{ }; use nu_utils::IgnoreCaseExt; use reedline::Suggestion; -use std::{ - path::{Path, MAIN_SEPARATOR as SEP}, - sync::Arc, -}; +use std::path::{Path, MAIN_SEPARATOR as SEP}; use super::SemanticSuggestion; -#[derive(Clone)] -pub struct FileCompletion { - engine_state: Arc, - stack: Stack, -} +#[derive(Clone, Default)] +pub struct FileCompletion {} impl FileCompletion { - pub fn new(engine_state: Arc, stack: Stack) -> Self { - Self { - engine_state, - stack, - } + pub fn new() -> Self { + Self::default() } } @@ -35,10 +26,11 @@ impl Completer for FileCompletion { fn fetch( &mut self, working_set: &StateWorkingSet, + stack: &Stack, prefix: Vec, span: Span, offset: usize, - _: usize, + _pos: usize, options: &CompletionOptions, ) -> Vec { let AdjustView { @@ -52,10 +44,10 @@ impl Completer for FileCompletion { readjusted, span, &prefix, - &self.engine_state.current_work_dir(), + &working_set.permanent_state.current_work_dir(), options, - self.engine_state.as_ref(), - &self.stack, + working_set.permanent_state, + stack, ) .into_iter() .map(move |x| SemanticSuggestion { diff --git a/crates/nu-cli/src/completions/flag_completions.rs b/crates/nu-cli/src/completions/flag_completions.rs index 07cd89dc0a..b0dcc0963b 100644 --- a/crates/nu-cli/src/completions/flag_completions.rs +++ b/crates/nu-cli/src/completions/flag_completions.rs @@ -1,7 +1,7 @@ use crate::completions::{Completer, CompletionOptions}; use nu_protocol::{ ast::{Expr, Expression}, - engine::StateWorkingSet, + engine::{Stack, StateWorkingSet}, Span, }; use reedline::Suggestion; @@ -23,10 +23,11 @@ impl Completer for FlagCompletion { fn fetch( &mut self, working_set: &StateWorkingSet, + _stack: &Stack, prefix: Vec, span: Span, offset: usize, - _: usize, + _pos: usize, options: &CompletionOptions, ) -> Vec { // Check if it's a flag diff --git a/crates/nu-cli/src/completions/variable_completions.rs b/crates/nu-cli/src/completions/variable_completions.rs index b869e9f972..0572fe93c1 100644 --- a/crates/nu-cli/src/completions/variable_completions.rs +++ b/crates/nu-cli/src/completions/variable_completions.rs @@ -3,30 +3,20 @@ use crate::completions::{ }; use nu_engine::{column::get_columns, eval_variable}; use nu_protocol::{ - engine::{EngineState, Stack, StateWorkingSet}, + engine::{Stack, StateWorkingSet}, Span, Value, }; use reedline::Suggestion; -use std::{str, sync::Arc}; +use std::str; #[derive(Clone)] pub struct VariableCompletion { - engine_state: Arc, // TODO: Is engine state necessary? It's already a part of working set in fetch() - stack: Stack, var_context: (Vec, Vec>), // tuple with $var and the sublevels (.b.c.d) } impl VariableCompletion { - pub fn new( - engine_state: Arc, - stack: Stack, - var_context: (Vec, Vec>), - ) -> Self { - Self { - engine_state, - stack, - var_context, - } + pub fn new(var_context: (Vec, Vec>)) -> Self { + Self { var_context } } } @@ -34,10 +24,11 @@ impl Completer for VariableCompletion { fn fetch( &mut self, working_set: &StateWorkingSet, + stack: &Stack, prefix: Vec, span: Span, offset: usize, - _: usize, + _pos: usize, options: &CompletionOptions, ) -> Vec { let mut output = vec![]; @@ -54,7 +45,7 @@ impl Completer for VariableCompletion { if !var_str.is_empty() { // Completion for $env. if var_str == "$env" { - let env_vars = self.stack.get_env_vars(&self.engine_state); + let env_vars = stack.get_env_vars(working_set.permanent_state); // Return nested values if sublevels_count > 0 { @@ -110,8 +101,8 @@ impl Completer for VariableCompletion { if var_str == "$nu" { // Eval nu var if let Ok(nuval) = eval_variable( - &self.engine_state, - &self.stack, + working_set.permanent_state, + stack, nu_protocol::NU_VARIABLE_ID, nu_protocol::Span::new(current_span.start, current_span.end), ) { @@ -133,7 +124,7 @@ impl Completer for VariableCompletion { // Completion other variable types if let Some(var_id) = var_id { // Extract the variable value from the stack - let var = self.stack.get_var(var_id, Span::new(span.start, span.end)); + let var = stack.get_var(var_id, Span::new(span.start, span.end)); // If the value exists and it's of type Record if let Ok(value) = var { @@ -207,7 +198,11 @@ impl Completer for VariableCompletion { // Permanent state vars // for scope in &self.engine_state.scope { - for overlay_frame in self.engine_state.active_overlays(&removed_overlays).rev() { + for overlay_frame in working_set + .permanent_state + .active_overlays(&removed_overlays) + .rev() + { for v in &overlay_frame.vars { if options.match_algorithm.matches_u8_insensitive( options.case_sensitive, diff --git a/crates/nu-cli/src/repl.rs b/crates/nu-cli/src/repl.rs index 338d924a69..02609924a1 100644 --- a/crates/nu-cli/src/repl.rs +++ b/crates/nu-cli/src/repl.rs @@ -389,7 +389,7 @@ fn loop_iteration(ctx: LoopContext) -> (bool, Stack, Reedline) { .with_completer(Box::new(NuCompleter::new( engine_reference.clone(), // STACK-REFERENCE 2 - Stack::with_parent(stack_arc.clone()), + stack_arc.clone(), ))) .with_quick_completions(config.quick_completions) .with_partial_completions(config.partial_completions) diff --git a/crates/nu-cli/tests/completions.rs b/crates/nu-cli/tests/completions.rs index a22d770010..cb883b67db 100644 --- a/crates/nu-cli/tests/completions.rs +++ b/crates/nu-cli/tests/completions.rs @@ -6,7 +6,10 @@ use nu_parser::parse; use nu_protocol::{debugger::WithoutDebug, engine::StateWorkingSet, PipelineData}; use reedline::{Completer, Suggestion}; use rstest::{fixture, rstest}; -use std::path::{PathBuf, MAIN_SEPARATOR}; +use std::{ + path::{PathBuf, MAIN_SEPARATOR}, + sync::Arc, +}; use support::{ completions_helpers::{new_partial_engine, new_quote_engine}, file, folder, match_suggestions, new_engine, @@ -22,7 +25,7 @@ fn completer() -> NuCompleter { assert!(support::merge_input(record.as_bytes(), &mut engine, &mut stack, dir).is_ok()); // Instantiate a new completer - NuCompleter::new(std::sync::Arc::new(engine), stack) + NuCompleter::new(Arc::new(engine), Arc::new(stack)) } #[fixture] @@ -36,7 +39,7 @@ fn completer_strings() -> NuCompleter { assert!(support::merge_input(record.as_bytes(), &mut engine, &mut stack, dir).is_ok()); // Instantiate a new completer - NuCompleter::new(std::sync::Arc::new(engine), stack) + NuCompleter::new(Arc::new(engine), Arc::new(stack)) } #[fixture] @@ -56,7 +59,7 @@ fn extern_completer() -> NuCompleter { assert!(support::merge_input(record.as_bytes(), &mut engine, &mut stack, dir).is_ok()); // Instantiate a new completer - NuCompleter::new(std::sync::Arc::new(engine), stack) + NuCompleter::new(Arc::new(engine), Arc::new(stack)) } #[fixture] @@ -79,14 +82,14 @@ fn custom_completer() -> NuCompleter { assert!(support::merge_input(record.as_bytes(), &mut engine, &mut stack, dir).is_ok()); // Instantiate a new completer - NuCompleter::new(std::sync::Arc::new(engine), stack) + NuCompleter::new(Arc::new(engine), Arc::new(stack)) } #[test] fn variables_dollar_sign_with_varialblecompletion() { let (_, _, engine, stack) = new_engine(); - let mut completer = NuCompleter::new(std::sync::Arc::new(engine), stack); + let mut completer = NuCompleter::new(Arc::new(engine), Arc::new(stack)); let target_dir = "$ "; let suggestions = completer.complete(target_dir, target_dir.len()); @@ -138,7 +141,7 @@ fn dotnu_completions() { let (_, _, engine, stack) = new_engine(); // Instantiate a new completer - let mut completer = NuCompleter::new(std::sync::Arc::new(engine), stack); + let mut completer = NuCompleter::new(Arc::new(engine), Arc::new(stack)); // Test source completion let completion_str = "source-env ".to_string(); @@ -217,7 +220,7 @@ fn file_completions() { let (dir, dir_str, engine, stack) = new_engine(); // Instantiate a new completer - let mut completer = NuCompleter::new(std::sync::Arc::new(engine), stack); + let mut completer = NuCompleter::new(Arc::new(engine), Arc::new(stack)); // Test completions for the current folder let target_dir = format!("cp {dir_str}{MAIN_SEPARATOR}"); @@ -265,7 +268,7 @@ fn partial_completions() { let (dir, _, engine, stack) = new_partial_engine(); // Instantiate a new completer - let mut completer = NuCompleter::new(std::sync::Arc::new(engine), stack); + let mut completer = NuCompleter::new(Arc::new(engine), Arc::new(stack)); // Test completions for a folder's name let target_dir = format!("cd {}", file(dir.join("pa"))); @@ -363,7 +366,7 @@ fn partial_completions() { fn command_ls_with_filecompletion() { let (_, _, engine, stack) = new_engine(); - let mut completer = NuCompleter::new(std::sync::Arc::new(engine), stack); + let mut completer = NuCompleter::new(Arc::new(engine), Arc::new(stack)); let target_dir = "ls "; let suggestions = completer.complete(target_dir, target_dir.len()); @@ -397,7 +400,7 @@ fn command_ls_with_filecompletion() { fn command_open_with_filecompletion() { let (_, _, engine, stack) = new_engine(); - let mut completer = NuCompleter::new(std::sync::Arc::new(engine), stack); + let mut completer = NuCompleter::new(Arc::new(engine), Arc::new(stack)); let target_dir = "open "; let suggestions = completer.complete(target_dir, target_dir.len()); @@ -432,7 +435,7 @@ fn command_open_with_filecompletion() { fn command_rm_with_globcompletion() { let (_, _, engine, stack) = new_engine(); - let mut completer = NuCompleter::new(std::sync::Arc::new(engine), stack); + let mut completer = NuCompleter::new(Arc::new(engine), Arc::new(stack)); let target_dir = "rm "; let suggestions = completer.complete(target_dir, target_dir.len()); @@ -467,7 +470,7 @@ fn command_rm_with_globcompletion() { fn command_cp_with_globcompletion() { let (_, _, engine, stack) = new_engine(); - let mut completer = NuCompleter::new(std::sync::Arc::new(engine), stack); + let mut completer = NuCompleter::new(Arc::new(engine), Arc::new(stack)); let target_dir = "cp "; let suggestions = completer.complete(target_dir, target_dir.len()); @@ -502,7 +505,7 @@ fn command_cp_with_globcompletion() { fn command_save_with_filecompletion() { let (_, _, engine, stack) = new_engine(); - let mut completer = NuCompleter::new(std::sync::Arc::new(engine), stack); + let mut completer = NuCompleter::new(Arc::new(engine), Arc::new(stack)); let target_dir = "save "; let suggestions = completer.complete(target_dir, target_dir.len()); @@ -537,7 +540,7 @@ fn command_save_with_filecompletion() { fn command_touch_with_filecompletion() { let (_, _, engine, stack) = new_engine(); - let mut completer = NuCompleter::new(std::sync::Arc::new(engine), stack); + let mut completer = NuCompleter::new(Arc::new(engine), Arc::new(stack)); let target_dir = "touch "; let suggestions = completer.complete(target_dir, target_dir.len()); @@ -572,7 +575,7 @@ fn command_touch_with_filecompletion() { fn command_watch_with_filecompletion() { let (_, _, engine, stack) = new_engine(); - let mut completer = NuCompleter::new(std::sync::Arc::new(engine), stack); + let mut completer = NuCompleter::new(Arc::new(engine), Arc::new(stack)); let target_dir = "watch "; let suggestions = completer.complete(target_dir, target_dir.len()); @@ -607,7 +610,7 @@ fn command_watch_with_filecompletion() { fn file_completion_quoted() { let (_, _, engine, stack) = new_quote_engine(); - let mut completer = NuCompleter::new(std::sync::Arc::new(engine), stack); + let mut completer = NuCompleter::new(Arc::new(engine), Arc::new(stack)); let target_dir = "open "; let suggestions = completer.complete(target_dir, target_dir.len()); @@ -645,7 +648,7 @@ fn flag_completions() { let (_, _, engine, stack) = new_engine(); // Instantiate a new completer - let mut completer = NuCompleter::new(std::sync::Arc::new(engine), stack); + let mut completer = NuCompleter::new(Arc::new(engine), Arc::new(stack)); // Test completions for the 'ls' flags let suggestions = completer.complete("ls -", 4); @@ -680,7 +683,7 @@ fn folder_with_directorycompletions() { let (dir, dir_str, engine, stack) = new_engine(); // Instantiate a new completer - let mut completer = NuCompleter::new(std::sync::Arc::new(engine), stack); + let mut completer = NuCompleter::new(Arc::new(engine), Arc::new(stack)); // Test completions for the current folder let target_dir = format!("cd {dir_str}{MAIN_SEPARATOR}"); @@ -709,7 +712,7 @@ fn variables_completions() { assert!(support::merge_input(record.as_bytes(), &mut engine, &mut stack, dir).is_ok()); // Instantiate a new completer - let mut completer = NuCompleter::new(std::sync::Arc::new(engine), stack); + let mut completer = NuCompleter::new(Arc::new(engine), Arc::new(stack)); // Test completions for $nu let suggestions = completer.complete("$nu.", 4); @@ -815,7 +818,7 @@ fn alias_of_command_and_flags() { let alias = r#"alias ll = ls -l"#; assert!(support::merge_input(alias.as_bytes(), &mut engine, &mut stack, dir).is_ok()); - let mut completer = NuCompleter::new(std::sync::Arc::new(engine), stack); + let mut completer = NuCompleter::new(Arc::new(engine), Arc::new(stack)); let suggestions = completer.complete("ll t", 4); #[cfg(windows)] @@ -834,7 +837,7 @@ fn alias_of_basic_command() { let alias = r#"alias ll = ls "#; assert!(support::merge_input(alias.as_bytes(), &mut engine, &mut stack, dir).is_ok()); - let mut completer = NuCompleter::new(std::sync::Arc::new(engine), stack); + let mut completer = NuCompleter::new(Arc::new(engine), Arc::new(stack)); let suggestions = completer.complete("ll t", 4); #[cfg(windows)] @@ -856,7 +859,7 @@ fn alias_of_another_alias() { let alias = r#"alias lf = ll -f"#; assert!(support::merge_input(alias.as_bytes(), &mut engine, &mut stack, dir).is_ok()); - let mut completer = NuCompleter::new(std::sync::Arc::new(engine), stack); + let mut completer = NuCompleter::new(Arc::new(engine), Arc::new(stack)); let suggestions = completer.complete("lf t", 4); #[cfg(windows)] @@ -890,7 +893,7 @@ fn run_external_completion(completer: &str, input: &str) -> Vec { assert!(engine_state.merge_env(&mut stack, &dir).is_ok()); // Instantiate a new completer - let mut completer = NuCompleter::new(std::sync::Arc::new(engine_state), stack); + let mut completer = NuCompleter::new(Arc::new(engine_state), Arc::new(stack)); completer.complete(input, input.len()) } @@ -899,7 +902,7 @@ fn run_external_completion(completer: &str, input: &str) -> Vec { fn unknown_command_completion() { let (_, _, engine, stack) = new_engine(); - let mut completer = NuCompleter::new(std::sync::Arc::new(engine), stack); + let mut completer = NuCompleter::new(Arc::new(engine), Arc::new(stack)); let target_dir = "thiscommanddoesnotexist "; let suggestions = completer.complete(target_dir, target_dir.len()); @@ -962,7 +965,7 @@ fn flagcompletion_triggers_after_cursor_piped(mut completer: NuCompleter) { fn filecompletions_triggers_after_cursor() { let (_, _, engine, stack) = new_engine(); - let mut completer = NuCompleter::new(std::sync::Arc::new(engine), stack); + let mut completer = NuCompleter::new(Arc::new(engine), Arc::new(stack)); let suggestions = completer.complete("cp test_c", 3); @@ -1071,7 +1074,7 @@ fn alias_offset_bug_7648() { let alias = r#"alias ea = ^$env.EDITOR /tmp/test.s"#; assert!(support::merge_input(alias.as_bytes(), &mut engine, &mut stack, dir).is_ok()); - let mut completer = NuCompleter::new(std::sync::Arc::new(engine), stack); + let mut completer = NuCompleter::new(Arc::new(engine), Arc::new(stack)); // Issue #7648 // Nushell crashes when an alias name is shorter than the alias command @@ -1090,7 +1093,7 @@ fn alias_offset_bug_7754() { let alias = r#"alias ll = ls -l"#; assert!(support::merge_input(alias.as_bytes(), &mut engine, &mut stack, dir).is_ok()); - let mut completer = NuCompleter::new(std::sync::Arc::new(engine), stack); + let mut completer = NuCompleter::new(Arc::new(engine), Arc::new(stack)); // Issue #7754 // Nushell crashes when an alias name is shorter than the alias command diff --git a/crates/nu-lsp/src/lib.rs b/crates/nu-lsp/src/lib.rs index 939bb0e4e0..47535d9bd4 100644 --- a/crates/nu-lsp/src/lib.rs +++ b/crates/nu-lsp/src/lib.rs @@ -552,8 +552,8 @@ impl LanguageServer { ¶ms.text_document_position.text_document.uri, )?; - let stack = Stack::new(); - let mut completer = NuCompleter::new(Arc::new(engine_state.clone()), stack); + let mut completer = + NuCompleter::new(Arc::new(engine_state.clone()), Arc::new(Stack::new())); let location = Self::lsp_position_to_location(¶ms.text_document_position.position, rope_of_file); diff --git a/src/ide.rs b/src/ide.rs index 2b39dda946..73419ed857 100644 --- a/src/ide.rs +++ b/src/ide.rs @@ -606,8 +606,7 @@ pub fn hover(engine_state: &mut EngineState, file_path: &str, location: &Value) } pub fn complete(engine_reference: Arc, file_path: &str, location: &Value) { - let stack = Stack::new(); - let mut completer = NuCompleter::new(engine_reference, stack); + let mut completer = NuCompleter::new(engine_reference, Arc::new(Stack::new())); let file = std::fs::read(file_path) .into_diagnostic() From 1b2e680059c55c49ff17f081cabd1d0285bcf356 Mon Sep 17 00:00:00 2001 From: Ian Manske Date: Thu, 9 May 2024 23:09:44 +0000 Subject: [PATCH 013/137] Fix syntax highlighting for `not` (#12815) # Description Fixes #12813 where a panic occurs when syntax highlighting `not`. Also fixes #12814 where syntax highlighting for `not` no longer works. # User-Facing Changes Bug fix. --- crates/nu-cli/tests/commands/mod.rs | 1 + crates/nu-cli/tests/commands/nu_highlight.rs | 7 +++++++ crates/nu-cli/tests/{completions.rs => completions/mod.rs} | 0 .../tests/{ => completions}/support/completions_helpers.rs | 0 crates/nu-cli/tests/{ => completions}/support/mod.rs | 0 crates/nu-cli/tests/main.rs | 2 ++ crates/nu-parser/src/flatten.rs | 4 ++-- 7 files changed, 12 insertions(+), 2 deletions(-) create mode 100644 crates/nu-cli/tests/commands/mod.rs create mode 100644 crates/nu-cli/tests/commands/nu_highlight.rs rename crates/nu-cli/tests/{completions.rs => completions/mod.rs} (100%) rename crates/nu-cli/tests/{ => completions}/support/completions_helpers.rs (100%) rename crates/nu-cli/tests/{ => completions}/support/mod.rs (100%) create mode 100644 crates/nu-cli/tests/main.rs diff --git a/crates/nu-cli/tests/commands/mod.rs b/crates/nu-cli/tests/commands/mod.rs new file mode 100644 index 0000000000..00488f0b9e --- /dev/null +++ b/crates/nu-cli/tests/commands/mod.rs @@ -0,0 +1 @@ +mod nu_highlight; diff --git a/crates/nu-cli/tests/commands/nu_highlight.rs b/crates/nu-cli/tests/commands/nu_highlight.rs new file mode 100644 index 0000000000..bd185a8634 --- /dev/null +++ b/crates/nu-cli/tests/commands/nu_highlight.rs @@ -0,0 +1,7 @@ +use nu_test_support::nu; + +#[test] +fn nu_highlight_not_expr() { + let actual = nu!("'not false' | nu-highlight | ansi strip"); + assert_eq!(actual.out, "not false"); +} diff --git a/crates/nu-cli/tests/completions.rs b/crates/nu-cli/tests/completions/mod.rs similarity index 100% rename from crates/nu-cli/tests/completions.rs rename to crates/nu-cli/tests/completions/mod.rs diff --git a/crates/nu-cli/tests/support/completions_helpers.rs b/crates/nu-cli/tests/completions/support/completions_helpers.rs similarity index 100% rename from crates/nu-cli/tests/support/completions_helpers.rs rename to crates/nu-cli/tests/completions/support/completions_helpers.rs diff --git a/crates/nu-cli/tests/support/mod.rs b/crates/nu-cli/tests/completions/support/mod.rs similarity index 100% rename from crates/nu-cli/tests/support/mod.rs rename to crates/nu-cli/tests/completions/support/mod.rs diff --git a/crates/nu-cli/tests/main.rs b/crates/nu-cli/tests/main.rs new file mode 100644 index 0000000000..a040a731f3 --- /dev/null +++ b/crates/nu-cli/tests/main.rs @@ -0,0 +1,2 @@ +mod commands; +mod completions; diff --git a/crates/nu-parser/src/flatten.rs b/crates/nu-parser/src/flatten.rs index e70a48e9f1..92b424783a 100644 --- a/crates/nu-parser/src/flatten.rs +++ b/crates/nu-parser/src/flatten.rs @@ -180,12 +180,12 @@ fn flatten_expression_into( flatten_expression_into(working_set, op, output); flatten_expression_into(working_set, rhs, output); } - Expr::UnaryNot(expr) => { + Expr::UnaryNot(not) => { output.push(( Span::new(expr.span.start, expr.span.start + 3), FlatShape::Operator, )); - flatten_expression_into(working_set, expr, output); + flatten_expression_into(working_set, not, output); } Expr::Closure(block_id) => { let outer_span = expr.span; From 72d3860d05e0cc62a22d9316f9028d01f309fa13 Mon Sep 17 00:00:00 2001 From: Ian Manske Date: Thu, 9 May 2024 23:29:27 +0000 Subject: [PATCH 014/137] Refactor the CLI code a bit (#12782) # Description Refactors the code in `nu-cli`, `main.rs`, `run.rs`, and few others. Namely, I added `EngineState::generate_nu_constant` function to eliminate some duplicate code. Otherwise, I changed a bunch of areas to return errors instead of calling `std::process::exit`. # User-Facing Changes Should be none. --- benches/benchmarks.rs | 8 +- crates/nu-cli/src/config_files.rs | 23 +-- crates/nu-cli/src/eval_cmds.rs | 53 ++--- crates/nu-cli/src/eval_file.rs | 192 ++++-------------- crates/nu-cli/src/prompt_update.rs | 7 +- crates/nu-cli/src/repl.rs | 8 +- crates/nu-cli/src/util.rs | 21 +- .../support/completions_helpers.rs | 7 +- crates/nu-cmd-base/src/util.rs | 7 +- crates/nu-engine/src/env.rs | 8 +- crates/nu-lsp/src/diagnostics.rs | 9 +- crates/nu-protocol/src/engine/engine_state.rs | 5 +- crates/nu-protocol/src/errors/cli_error.rs | 1 - crates/nu-protocol/src/eval_const.rs | 4 +- src/command.rs | 1 - src/config_files.rs | 14 +- src/ide.rs | 19 +- src/main.rs | 13 +- src/run.rs | 51 +++-- 19 files changed, 146 insertions(+), 305 deletions(-) diff --git a/benches/benchmarks.rs b/benches/benchmarks.rs index 9de7cc5758..84552daef9 100644 --- a/benches/benchmarks.rs +++ b/benches/benchmarks.rs @@ -4,8 +4,7 @@ use nu_plugin_protocol::{PluginCallResponse, PluginOutput}; use nu_protocol::{ engine::{EngineState, Stack}, - eval_const::create_nu_constant, - PipelineData, Span, Spanned, Value, NU_VARIABLE_ID, + PipelineData, Span, Spanned, Value, }; use nu_std::load_standard_library; use nu_utils::{get_default_config, get_default_env}; @@ -30,9 +29,7 @@ fn setup_engine() -> EngineState { // parsing config.nu breaks without PWD set, so set a valid path engine_state.add_env_var("PWD".into(), Value::string(cwd, Span::test_data())); - let nu_const = create_nu_constant(&engine_state, Span::unknown()) - .expect("Failed to create nushell constant."); - engine_state.set_variable_const_val(NU_VARIABLE_ID, nu_const); + engine_state.generate_nu_constant(); engine_state } @@ -86,6 +83,7 @@ fn bench_command( b.iter(move || { let mut stack = stack.clone(); let mut engine = engine.clone(); + #[allow(clippy::unit_arg)] black_box( evaluate_commands( &commands, diff --git a/crates/nu-cli/src/config_files.rs b/crates/nu-cli/src/config_files.rs index 091fe7daa3..e89fa6c1d1 100644 --- a/crates/nu-cli/src/config_files.rs +++ b/crates/nu-cli/src/config_files.rs @@ -1,12 +1,12 @@ use crate::util::eval_source; #[cfg(feature = "plugin")] use nu_path::canonicalize_with; -use nu_protocol::{ - engine::{EngineState, Stack, StateWorkingSet}, - report_error, HistoryFileFormat, PipelineData, -}; #[cfg(feature = "plugin")] -use nu_protocol::{ParseError, PluginRegistryFile, Spanned}; +use nu_protocol::{engine::StateWorkingSet, report_error, ParseError, PluginRegistryFile, Spanned}; +use nu_protocol::{ + engine::{EngineState, Stack}, + report_error_new, HistoryFileFormat, PipelineData, +}; #[cfg(feature = "plugin")] use nu_utils::utils::perf; use std::path::PathBuf; @@ -25,10 +25,9 @@ pub fn read_plugin_file( plugin_file: Option>, storage_path: &str, ) { + use nu_protocol::ShellError; use std::path::Path; - use nu_protocol::{report_error_new, ShellError}; - let span = plugin_file.as_ref().map(|s| s.span); // Check and warn + abort if this is a .nu plugin file @@ -239,13 +238,11 @@ pub fn eval_config_contents( match engine_state.cwd(Some(stack)) { Ok(cwd) => { if let Err(e) = engine_state.merge_env(stack, cwd) { - let working_set = StateWorkingSet::new(engine_state); - report_error(&working_set, &e); + report_error_new(engine_state, &e); } } Err(e) => { - let working_set = StateWorkingSet::new(engine_state); - report_error(&working_set, &e); + report_error_new(engine_state, &e); } } } @@ -266,8 +263,8 @@ pub(crate) fn get_history_path(storage_path: &str, mode: HistoryFileFormat) -> O #[cfg(feature = "plugin")] pub fn migrate_old_plugin_file(engine_state: &EngineState, storage_path: &str) -> bool { use nu_protocol::{ - report_error_new, PluginExample, PluginIdentity, PluginRegistryItem, - PluginRegistryItemData, PluginSignature, ShellError, + PluginExample, PluginIdentity, PluginRegistryItem, PluginRegistryItemData, PluginSignature, + ShellError, }; use std::collections::BTreeMap; diff --git a/crates/nu-cli/src/eval_cmds.rs b/crates/nu-cli/src/eval_cmds.rs index 1e3cc70348..0b0b5f8ddf 100644 --- a/crates/nu-cli/src/eval_cmds.rs +++ b/crates/nu-cli/src/eval_cmds.rs @@ -5,8 +5,9 @@ use nu_parser::parse; use nu_protocol::{ debugger::WithoutDebug, engine::{EngineState, Stack, StateWorkingSet}, - report_error, PipelineData, Spanned, Value, + report_error, PipelineData, ShellError, Spanned, Value, }; +use std::sync::Arc; /// Run a command (or commands) given to us by the user pub fn evaluate_commands( @@ -16,13 +17,9 @@ pub fn evaluate_commands( input: PipelineData, table_mode: Option, no_newline: bool, -) -> Result> { +) -> Result<(), ShellError> { // Translate environment variables from Strings to Values - if let Some(e) = convert_env_values(engine_state, stack) { - let working_set = StateWorkingSet::new(engine_state); - report_error(&working_set, &e); - std::process::exit(1); - } + convert_env_values(engine_state, stack)?; // Parse the source code let (block, delta) = { @@ -41,7 +38,6 @@ pub fn evaluate_commands( if let Some(err) = working_set.parse_errors.first() { report_error(&working_set, err); - std::process::exit(1); } @@ -49,35 +45,26 @@ pub fn evaluate_commands( }; // Update permanent state - if let Err(err) = engine_state.merge_delta(delta) { - let working_set = StateWorkingSet::new(engine_state); - report_error(&working_set, &err); - } + engine_state.merge_delta(delta)?; // Run the block - let exit_code = match eval_block::(engine_state, stack, &block, input) { - Ok(pipeline_data) => { - let mut config = engine_state.get_config().clone(); - if let Some(t_mode) = table_mode { - config.table_mode = t_mode.coerce_str()?.parse().unwrap_or_default(); - } - crate::eval_file::print_table_or_error( - engine_state, - stack, - pipeline_data, - &mut config, - no_newline, - ) - } - Err(err) => { - let working_set = StateWorkingSet::new(engine_state); + let pipeline = eval_block::(engine_state, stack, &block, input)?; - report_error(&working_set, &err); - std::process::exit(1); - } - }; + if let PipelineData::Value(Value::Error { error, .. }, ..) = pipeline { + return Err(*error); + } + + if let Some(t_mode) = table_mode { + Arc::make_mut(&mut engine_state.config).table_mode = + t_mode.coerce_str()?.parse().unwrap_or_default(); + } + + let exit_code = pipeline.print(engine_state, stack, no_newline, false)?; + if exit_code != 0 { + std::process::exit(exit_code as i32); + } info!("evaluate {}:{}:{}", file!(), line!(), column!()); - Ok(exit_code) + Ok(()) } diff --git a/crates/nu-cli/src/eval_file.rs b/crates/nu-cli/src/eval_file.rs index 8107de71a5..7483c6bc33 100644 --- a/crates/nu-cli/src/eval_file.rs +++ b/crates/nu-cli/src/eval_file.rs @@ -1,15 +1,14 @@ use crate::util::eval_source; use log::{info, trace}; -use miette::{IntoDiagnostic, Result}; use nu_engine::{convert_env_values, eval_block}; use nu_parser::parse; use nu_path::canonicalize_with; use nu_protocol::{ debugger::WithoutDebug, engine::{EngineState, Stack, StateWorkingSet}, - report_error, Config, PipelineData, ShellError, Span, Value, + report_error, PipelineData, ShellError, Span, Value, }; -use std::{io::Write, sync::Arc}; +use std::sync::Arc; /// Entry point for evaluating a file. /// @@ -21,73 +20,40 @@ pub fn evaluate_file( engine_state: &mut EngineState, stack: &mut Stack, input: PipelineData, -) -> Result<()> { +) -> Result<(), ShellError> { // Convert environment variables from Strings to Values and store them in the engine state. - if let Some(e) = convert_env_values(engine_state, stack) { - let working_set = StateWorkingSet::new(engine_state); - report_error(&working_set, &e); - std::process::exit(1); - } + convert_env_values(engine_state, stack)?; let cwd = engine_state.cwd_as_string(Some(stack))?; - let file_path = canonicalize_with(&path, cwd).unwrap_or_else(|e| { - let working_set = StateWorkingSet::new(engine_state); - report_error( - &working_set, - &ShellError::FileNotFoundCustom { - msg: format!("Could not access file '{}': {:?}", path, e.to_string()), - span: Span::unknown(), - }, - ); - std::process::exit(1); - }); + let file_path = + canonicalize_with(&path, cwd).map_err(|err| ShellError::FileNotFoundCustom { + msg: format!("Could not access file '{path}': {err}"), + span: Span::unknown(), + })?; - let file_path_str = file_path.to_str().unwrap_or_else(|| { - let working_set = StateWorkingSet::new(engine_state); - report_error( - &working_set, - &ShellError::NonUtf8Custom { - msg: format!( - "Input file name '{}' is not valid UTF8", - file_path.to_string_lossy() - ), - span: Span::unknown(), - }, - ); - std::process::exit(1); - }); + let file_path_str = file_path + .to_str() + .ok_or_else(|| ShellError::NonUtf8Custom { + msg: format!( + "Input file name '{}' is not valid UTF8", + file_path.to_string_lossy() + ), + span: Span::unknown(), + })?; - let file = std::fs::read(&file_path) - .into_diagnostic() - .unwrap_or_else(|e| { - let working_set = StateWorkingSet::new(engine_state); - report_error( - &working_set, - &ShellError::FileNotFoundCustom { - msg: format!( - "Could not read file '{}': {:?}", - file_path_str, - e.to_string() - ), - span: Span::unknown(), - }, - ); - std::process::exit(1); - }); + let file = std::fs::read(&file_path).map_err(|err| ShellError::FileNotFoundCustom { + msg: format!("Could not read file '{file_path_str}': {err}"), + span: Span::unknown(), + })?; engine_state.file = Some(file_path.clone()); - let parent = file_path.parent().unwrap_or_else(|| { - let working_set = StateWorkingSet::new(engine_state); - report_error( - &working_set, - &ShellError::FileNotFoundCustom { - msg: format!("The file path '{file_path_str}' does not have a parent"), - span: Span::unknown(), - }, - ); - std::process::exit(1); - }); + let parent = file_path + .parent() + .ok_or_else(|| ShellError::FileNotFoundCustom { + msg: format!("The file path '{file_path_str}' does not have a parent"), + span: Span::unknown(), + })?; stack.add_env_var( "FILE_PWD".to_string(), @@ -127,42 +93,25 @@ pub fn evaluate_file( } // Merge the changes into the engine state. - engine_state - .merge_delta(working_set.delta) - .expect("merging delta into engine_state should succeed"); + engine_state.merge_delta(working_set.delta)?; // Check if the file contains a main command. if engine_state.find_decl(b"main", &[]).is_some() { // Evaluate the file, but don't run main yet. - let pipeline_data = - eval_block::(engine_state, stack, &block, PipelineData::empty()); - let pipeline_data = match pipeline_data { - Err(ShellError::Return { .. }) => { - // Allow early return before main is run. - return Ok(()); - } - x => x, - } - .unwrap_or_else(|e| { - let working_set = StateWorkingSet::new(engine_state); - report_error(&working_set, &e); - std::process::exit(1); - }); - - // Print the pipeline output of the file. - // The pipeline output of a file is the pipeline output of its last command. - let result = pipeline_data.print(engine_state, stack, true, false); - match result { - Err(err) => { - let working_set = StateWorkingSet::new(engine_state); - report_error(&working_set, &err); - std::process::exit(1); - } - Ok(exit_code) => { - if exit_code != 0 { - std::process::exit(exit_code as i32); + let pipeline = + match eval_block::(engine_state, stack, &block, PipelineData::empty()) { + Ok(data) => data, + Err(ShellError::Return { .. }) => { + // Allow early return before main is run. + return Ok(()); } - } + Err(err) => return Err(err), + }; + + // Print the pipeline output of the last command of the file. + let exit_code = pipeline.print(engine_state, stack, true, false)?; + if exit_code != 0 { + std::process::exit(exit_code as i32); } // Invoke the main command with arguments. @@ -186,60 +135,3 @@ pub fn evaluate_file( Ok(()) } - -pub(crate) fn print_table_or_error( - engine_state: &mut EngineState, - stack: &mut Stack, - mut pipeline_data: PipelineData, - config: &mut Config, - no_newline: bool, -) -> Option { - let exit_code = match &mut pipeline_data { - PipelineData::ExternalStream { exit_code, .. } => exit_code.take(), - _ => None, - }; - - // Change the engine_state config to use the passed in configuration - engine_state.set_config(config.clone()); - - if let PipelineData::Value(Value::Error { error, .. }, ..) = &pipeline_data { - let working_set = StateWorkingSet::new(engine_state); - report_error(&working_set, &**error); - std::process::exit(1); - } - - // We don't need to do anything special to print a table because print() handles it - print_or_exit(pipeline_data, engine_state, stack, no_newline); - - // Make sure everything has finished - if let Some(exit_code) = exit_code { - let mut exit_code: Vec<_> = exit_code.into_iter().collect(); - exit_code - .pop() - .and_then(|last_exit_code| match last_exit_code { - Value::Int { val: code, .. } => Some(code), - _ => None, - }) - } else { - None - } -} - -fn print_or_exit( - pipeline_data: PipelineData, - engine_state: &EngineState, - stack: &mut Stack, - no_newline: bool, -) { - let result = pipeline_data.print(engine_state, stack, no_newline, false); - - let _ = std::io::stdout().flush(); - let _ = std::io::stderr().flush(); - - if let Err(error) = result { - let working_set = StateWorkingSet::new(engine_state); - report_error(&working_set, &error); - let _ = std::io::stderr().flush(); - std::process::exit(1); - } -} diff --git a/crates/nu-cli/src/prompt_update.rs b/crates/nu-cli/src/prompt_update.rs index 827bff0e5a..0c24b8282a 100644 --- a/crates/nu-cli/src/prompt_update.rs +++ b/crates/nu-cli/src/prompt_update.rs @@ -2,8 +2,8 @@ use crate::NushellPrompt; use log::trace; use nu_engine::ClosureEvalOnce; use nu_protocol::{ - engine::{EngineState, Stack, StateWorkingSet}, - report_error, Config, PipelineData, Value, + engine::{EngineState, Stack}, + report_error_new, Config, PipelineData, Value, }; use reedline::Prompt; @@ -77,8 +77,7 @@ fn get_prompt_string( result .map_err(|err| { - let working_set = StateWorkingSet::new(engine_state); - report_error(&working_set, &err); + report_error_new(engine_state, &err); }) .ok() } diff --git a/crates/nu-cli/src/repl.rs b/crates/nu-cli/src/repl.rs index 02609924a1..3fe51db496 100644 --- a/crates/nu-cli/src/repl.rs +++ b/crates/nu-cli/src/repl.rs @@ -26,9 +26,8 @@ use nu_parser::{lex, parse, trim_quotes_str}; use nu_protocol::{ config::NuCursorShape, engine::{EngineState, Stack, StateWorkingSet}, - eval_const::create_nu_constant, report_error_new, HistoryConfig, HistoryFileFormat, PipelineData, ShellError, Span, Spanned, - Value, NU_VARIABLE_ID, + Value, }; use nu_utils::{ filesystem::{have_permission, PermissionResult}, @@ -87,7 +86,7 @@ pub fn evaluate_repl( let start_time = std::time::Instant::now(); // Translate environment variables from Strings to Values - if let Some(e) = convert_env_values(engine_state, &unique_stack) { + if let Err(e) = convert_env_values(engine_state, &unique_stack) { report_error_new(engine_state, &e); } perf( @@ -145,8 +144,7 @@ pub fn evaluate_repl( engine_state.set_startup_time(entire_start_time.elapsed().as_nanos() as i64); // Regenerate the $nu constant to contain the startup time and any other potential updates - let nu_const = create_nu_constant(engine_state, Span::unknown())?; - engine_state.set_variable_const_val(NU_VARIABLE_ID, nu_const); + engine_state.generate_nu_constant(); if load_std_lib.is_none() && engine_state.get_config().show_banner { eval_source( diff --git a/crates/nu-cli/src/util.rs b/crates/nu-cli/src/util.rs index 8ff4ef35ea..2f996691c9 100644 --- a/crates/nu-cli/src/util.rs +++ b/crates/nu-cli/src/util.rs @@ -39,9 +39,8 @@ fn gather_env_vars( init_cwd: &Path, ) { fn report_capture_error(engine_state: &EngineState, env_str: &str, msg: &str) { - let working_set = StateWorkingSet::new(engine_state); - report_error( - &working_set, + report_error_new( + engine_state, &ShellError::GenericError { error: format!("Environment variable was not captured: {env_str}"), msg: "".into(), @@ -71,9 +70,8 @@ fn gather_env_vars( } None => { // Could not capture current working directory - let working_set = StateWorkingSet::new(engine_state); - report_error( - &working_set, + report_error_new( + engine_state, &ShellError::GenericError { error: "Current directory is not a valid utf-8 path".into(), msg: "".into(), @@ -278,10 +276,7 @@ pub fn eval_source( match result { Err(err) => { - let working_set = StateWorkingSet::new(engine_state); - - report_error(&working_set, &err); - + report_error_new(engine_state, &err); return false; } Ok(exit_code) => { @@ -297,11 +292,7 @@ pub fn eval_source( } Err(err) => { set_last_exit_code(stack, 1); - - let working_set = StateWorkingSet::new(engine_state); - - report_error(&working_set, &err); - + report_error_new(engine_state, &err); return false; } } diff --git a/crates/nu-cli/tests/completions/support/completions_helpers.rs b/crates/nu-cli/tests/completions/support/completions_helpers.rs index bdc52739ee..47f46ab00e 100644 --- a/crates/nu-cli/tests/completions/support/completions_helpers.rs +++ b/crates/nu-cli/tests/completions/support/completions_helpers.rs @@ -3,8 +3,7 @@ use nu_parser::parse; use nu_protocol::{ debugger::WithoutDebug, engine::{EngineState, Stack, StateWorkingSet}, - eval_const::create_nu_constant, - PipelineData, ShellError, Span, Value, NU_VARIABLE_ID, + PipelineData, ShellError, Span, Value, }; use nu_test_support::fs; use reedline::Suggestion; @@ -28,9 +27,7 @@ pub fn new_engine() -> (PathBuf, String, EngineState, Stack) { let mut engine_state = create_default_context(); // Add $nu - let nu_const = - create_nu_constant(&engine_state, Span::test_data()).expect("Failed creating $nu"); - engine_state.set_variable_const_val(NU_VARIABLE_ID, nu_const); + engine_state.generate_nu_constant(); // New stack let mut stack = Stack::new(); diff --git a/crates/nu-cmd-base/src/util.rs b/crates/nu-cmd-base/src/util.rs index 619237a21c..4d19fdb3c6 100644 --- a/crates/nu-cmd-base/src/util.rs +++ b/crates/nu-cmd-base/src/util.rs @@ -1,6 +1,6 @@ use nu_protocol::{ - engine::{EngineState, Stack, StateWorkingSet}, - report_error, Range, ShellError, Span, Value, + engine::{EngineState, Stack}, + report_error_new, Range, ShellError, Span, Value, }; use std::{ops::Bound, path::PathBuf}; @@ -14,8 +14,7 @@ pub fn get_init_cwd() -> PathBuf { pub fn get_guaranteed_cwd(engine_state: &EngineState, stack: &Stack) -> PathBuf { engine_state.cwd(Some(stack)).unwrap_or_else(|e| { - let working_set = StateWorkingSet::new(engine_state); - report_error(&working_set, &e); + report_error_new(engine_state, &e); crate::util::get_init_cwd() }) } diff --git a/crates/nu-engine/src/env.rs b/crates/nu-engine/src/env.rs index ae226c4421..44692dd131 100644 --- a/crates/nu-engine/src/env.rs +++ b/crates/nu-engine/src/env.rs @@ -32,7 +32,7 @@ enum ConversionResult { /// It returns Option instead of Result since we do want to translate all the values we can and /// skip errors. This function is called in the main() so we want to keep running, we cannot just /// exit. -pub fn convert_env_values(engine_state: &mut EngineState, stack: &Stack) -> Option { +pub fn convert_env_values(engine_state: &mut EngineState, stack: &Stack) -> Result<(), ShellError> { let mut error = None; let mut new_scope = HashMap::new(); @@ -85,7 +85,11 @@ pub fn convert_env_values(engine_state: &mut EngineState, stack: &Stack) -> Opti }); } - error + if let Some(err) = error { + Err(err) + } else { + Ok(()) + } } /// Translate one environment variable from Value to String diff --git a/crates/nu-lsp/src/diagnostics.rs b/crates/nu-lsp/src/diagnostics.rs index a9bbab77a9..423fffbf6c 100644 --- a/crates/nu-lsp/src/diagnostics.rs +++ b/crates/nu-lsp/src/diagnostics.rs @@ -7,8 +7,7 @@ use miette::{IntoDiagnostic, Result}; use nu_parser::parse; use nu_protocol::{ engine::{EngineState, StateWorkingSet}, - eval_const::create_nu_constant, - Span, Value, NU_VARIABLE_ID, + Value, }; impl LanguageServer { @@ -19,11 +18,7 @@ impl LanguageServer { ) -> Result<()> { let cwd = std::env::current_dir().expect("Could not get current working directory."); engine_state.add_env_var("PWD".into(), Value::test_string(cwd.to_string_lossy())); - - let Ok(nu_const) = create_nu_constant(engine_state, Span::unknown()) else { - return Ok(()); - }; - engine_state.set_variable_const_val(NU_VARIABLE_ID, nu_const); + engine_state.generate_nu_constant(); let mut working_set = StateWorkingSet::new(engine_state); diff --git a/crates/nu-protocol/src/engine/engine_state.rs b/crates/nu-protocol/src/engine/engine_state.rs index 1593b3341a..4a0ff4c4ae 100644 --- a/crates/nu-protocol/src/engine/engine_state.rs +++ b/crates/nu-protocol/src/engine/engine_state.rs @@ -6,6 +6,7 @@ use crate::{ CachedFile, Command, CommandType, EnvVars, OverlayFrame, ScopeFrame, Stack, StateDelta, Variable, Visibility, DEFAULT_OVERLAY_NAME, }, + eval_const::create_nu_constant, BlockId, Category, Config, DeclId, Example, FileId, HistoryConfig, Module, ModuleId, OverlayId, ShellError, Signature, Span, Type, Value, VarId, VirtualPathId, }; @@ -753,8 +754,8 @@ impl EngineState { var.const_val.as_ref() } - pub fn set_variable_const_val(&mut self, var_id: VarId, val: Value) { - self.vars[var_id].const_val = Some(val); + pub fn generate_nu_constant(&mut self) { + self.vars[NU_VARIABLE_ID].const_val = Some(create_nu_constant(self, Span::unknown())); } pub fn get_decl(&self, decl_id: DeclId) -> &dyn Command { diff --git a/crates/nu-protocol/src/errors/cli_error.rs b/crates/nu-protocol/src/errors/cli_error.rs index 6be1ebce40..003564f933 100644 --- a/crates/nu-protocol/src/errors/cli_error.rs +++ b/crates/nu-protocol/src/errors/cli_error.rs @@ -41,7 +41,6 @@ pub fn report_error_new( error: &(dyn miette::Diagnostic + Send + Sync + 'static), ) { let working_set = StateWorkingSet::new(engine_state); - report_error(&working_set, error); } diff --git a/crates/nu-protocol/src/eval_const.rs b/crates/nu-protocol/src/eval_const.rs index e45e3b7e4b..140a8303d9 100644 --- a/crates/nu-protocol/src/eval_const.rs +++ b/crates/nu-protocol/src/eval_const.rs @@ -12,7 +12,7 @@ use std::{ }; /// Create a Value for `$nu`. -pub fn create_nu_constant(engine_state: &EngineState, span: Span) -> Result { +pub(crate) fn create_nu_constant(engine_state: &EngineState, span: Span) -> Value { fn canonicalize_path(engine_state: &EngineState, path: &Path) -> PathBuf { #[allow(deprecated)] let cwd = engine_state.current_work_dir(); @@ -200,7 +200,7 @@ pub fn create_nu_constant(engine_state: &EngineState, span: Span) -> Result { if let Err(e) = engine_state.merge_env(stack, cwd) { - let working_set = StateWorkingSet::new(engine_state); - report_error(&working_set, &e); + report_error_new(engine_state, &e); } } Err(e) => { - let working_set = StateWorkingSet::new(engine_state); - report_error(&working_set, &e); + report_error_new(engine_state, &e); } } } @@ -193,13 +191,11 @@ fn eval_default_config( match engine_state.cwd(Some(stack)) { Ok(cwd) => { if let Err(e) = engine_state.merge_env(stack, cwd) { - let working_set = StateWorkingSet::new(engine_state); - report_error(&working_set, &e); + report_error_new(engine_state, &e); } } Err(e) => { - let working_set = StateWorkingSet::new(engine_state); - report_error(&working_set, &e); + report_error_new(engine_state, &e); } } } diff --git a/src/ide.rs b/src/ide.rs index 73419ed857..a3474fe3b6 100644 --- a/src/ide.rs +++ b/src/ide.rs @@ -3,8 +3,7 @@ use nu_cli::NuCompleter; use nu_parser::{flatten_block, parse, FlatShape}; use nu_protocol::{ engine::{EngineState, Stack, StateWorkingSet}, - eval_const::create_nu_constant, - report_error, DeclId, ShellError, Span, Value, VarId, NU_VARIABLE_ID, + report_error_new, DeclId, ShellError, Span, Value, VarId, }; use reedline::Completer; use serde_json::{json, Value as JsonValue}; @@ -56,9 +55,8 @@ fn read_in_file<'a>( let file = std::fs::read(file_path) .into_diagnostic() .unwrap_or_else(|e| { - let working_set = StateWorkingSet::new(engine_state); - report_error( - &working_set, + report_error_new( + engine_state, &ShellError::FileNotFoundCustom { msg: format!("Could not read file '{}': {:?}", file_path, e.to_string()), span: Span::unknown(), @@ -77,16 +75,7 @@ fn read_in_file<'a>( pub fn check(engine_state: &mut EngineState, file_path: &str, max_errors: &Value) { let cwd = std::env::current_dir().expect("Could not get current working directory."); engine_state.add_env_var("PWD".into(), Value::test_string(cwd.to_string_lossy())); - let working_set = StateWorkingSet::new(engine_state); - - let nu_const = match create_nu_constant(engine_state, Span::unknown()) { - Ok(nu_const) => nu_const, - Err(err) => { - report_error(&working_set, &err); - std::process::exit(1); - } - }; - engine_state.set_variable_const_val(NU_VARIABLE_ID, nu_const); + engine_state.generate_nu_constant(); let mut working_set = StateWorkingSet::new(engine_state); let file = std::fs::read(file_path); diff --git a/src/main.rs b/src/main.rs index b7e70f1ed4..714b932df8 100644 --- a/src/main.rs +++ b/src/main.rs @@ -27,8 +27,8 @@ use nu_cmd_base::util::get_init_cwd; use nu_lsp::LanguageServer; use nu_path::canonicalize_with; use nu_protocol::{ - engine::EngineState, eval_const::create_nu_constant, report_error_new, util::BufferedReader, - PipelineData, RawStream, ShellError, Span, Value, NU_VARIABLE_ID, + engine::EngineState, report_error_new, util::BufferedReader, PipelineData, RawStream, + ShellError, Span, Value, }; use nu_std::load_standard_library; use nu_utils::utils::perf; @@ -378,8 +378,7 @@ fn main() -> Result<()> { start_time = std::time::Instant::now(); // Set up the $nu constant before evaluating config files (need to have $nu available in them) - let nu_const = create_nu_constant(&engine_state, input.span().unwrap_or_else(Span::unknown))?; - engine_state.set_variable_const_val(NU_VARIABLE_ID, nu_const); + engine_state.generate_nu_constant(); perf( "create_nu_constant", start_time, @@ -462,7 +461,8 @@ fn main() -> Result<()> { &commands, input, entire_start_time, - ) + ); + Ok(()) } else if !script_name.is_empty() { run_file( &mut engine_state, @@ -471,7 +471,8 @@ fn main() -> Result<()> { script_name, args_to_script, input, - ) + ); + Ok(()) } else { run_repl(&mut engine_state, parsed_nu_cli_args, entire_start_time) } diff --git a/src/run.rs b/src/run.rs index 81274df3de..2996bc76f8 100644 --- a/src/run.rs +++ b/src/run.rs @@ -8,19 +8,22 @@ use log::trace; #[cfg(feature = "plugin")] use nu_cli::read_plugin_file; use nu_cli::{evaluate_commands, evaluate_file, evaluate_repl}; -use nu_protocol::{eval_const::create_nu_constant, PipelineData, Span, NU_VARIABLE_ID}; +use nu_protocol::{ + engine::{EngineState, Stack}, + report_error_new, PipelineData, Spanned, +}; use nu_utils::utils::perf; pub(crate) fn run_commands( - engine_state: &mut nu_protocol::engine::EngineState, + engine_state: &mut EngineState, parsed_nu_cli_args: command::NushellCliArgs, use_color: bool, - commands: &nu_protocol::Spanned, + commands: &Spanned, input: PipelineData, entire_start_time: std::time::Instant, -) -> Result<(), miette::ErrReport> { +) { trace!("run_commands"); - let mut stack = nu_protocol::engine::Stack::new(); + let mut stack = Stack::new(); let start_time = std::time::Instant::now(); // if the --no-config-file(-n) option is NOT passed, load the plugin file, @@ -103,18 +106,20 @@ pub(crate) fn run_commands( engine_state.set_startup_time(entire_start_time.elapsed().as_nanos() as i64); // Regenerate the $nu constant to contain the startup time and any other potential updates - let nu_const = create_nu_constant(engine_state, commands.span)?; - engine_state.set_variable_const_val(NU_VARIABLE_ID, nu_const); + engine_state.generate_nu_constant(); let start_time = std::time::Instant::now(); - let ret_val = evaluate_commands( + if let Err(err) = evaluate_commands( commands, engine_state, &mut stack, input, parsed_nu_cli_args.table_mode, parsed_nu_cli_args.no_newline.is_some(), - ); + ) { + report_error_new(engine_state, &err); + std::process::exit(1); + } perf( "evaluate_commands", start_time, @@ -123,24 +128,18 @@ pub(crate) fn run_commands( column!(), use_color, ); - - match ret_val { - Ok(Some(exit_code)) => std::process::exit(exit_code as i32), - Ok(None) => Ok(()), - Err(e) => Err(e), - } } pub(crate) fn run_file( - engine_state: &mut nu_protocol::engine::EngineState, + engine_state: &mut EngineState, parsed_nu_cli_args: command::NushellCliArgs, use_color: bool, script_name: String, args_to_script: Vec, input: PipelineData, -) -> Result<(), miette::ErrReport> { +) { trace!("run_file"); - let mut stack = nu_protocol::engine::Stack::new(); + let mut stack = Stack::new(); // if the --no-config-file(-n) option is NOT passed, load the plugin file, // load the default env file or custom (depending on parsed_nu_cli_args.env_file), @@ -201,17 +200,19 @@ pub(crate) fn run_file( } // Regenerate the $nu constant to contain the startup time and any other potential updates - let nu_const = create_nu_constant(engine_state, input.span().unwrap_or_else(Span::unknown))?; - engine_state.set_variable_const_val(NU_VARIABLE_ID, nu_const); + engine_state.generate_nu_constant(); let start_time = std::time::Instant::now(); - let ret_val = evaluate_file( + if let Err(err) = evaluate_file( script_name, &args_to_script, engine_state, &mut stack, input, - ); + ) { + report_error_new(engine_state, &err); + std::process::exit(1); + } perf( "evaluate_file", start_time, @@ -239,17 +240,15 @@ pub(crate) fn run_file( column!(), use_color, ); - - ret_val } pub(crate) fn run_repl( - engine_state: &mut nu_protocol::engine::EngineState, + engine_state: &mut EngineState, parsed_nu_cli_args: command::NushellCliArgs, entire_start_time: std::time::Instant, ) -> Result<(), miette::ErrReport> { trace!("run_repl"); - let mut stack = nu_protocol::engine::Stack::new(); + let mut stack = Stack::new(); let start_time = std::time::Instant::now(); if parsed_nu_cli_args.no_config_file.is_none() { From 70c01bbb2617f5cd0cdeaeb66c691179a224b8aa Mon Sep 17 00:00:00 2001 From: Ian Manske Date: Thu, 9 May 2024 23:50:31 +0000 Subject: [PATCH 015/137] Fix raw strings as external argument (#12817) # Description As discovered by @YizhePKU in a [comment](https://github.com/nushell/nushell/pull/9956#issuecomment-2103123797) in #9956, raw strings are not parsed properly when they are used as an argument to an external command. This PR fixes that. # Tests + Formatting Added a test. --- crates/nu-parser/src/parser.rs | 2 ++ crates/nu-parser/tests/test_parser.rs | 24 ++++++++++++++++++++++- tests/shell/pipeline/commands/external.rs | 7 +++++++ 3 files changed, 32 insertions(+), 1 deletion(-) diff --git a/crates/nu-parser/src/parser.rs b/crates/nu-parser/src/parser.rs index 7dfccf7bad..3416e538df 100644 --- a/crates/nu-parser/src/parser.rs +++ b/crates/nu-parser/src/parser.rs @@ -228,6 +228,8 @@ fn parse_external_arg(working_set: &mut StateWorkingSet, span: Span) -> External ExternalArgument::Regular(parse_dollar_expr(working_set, span)) } else if contents.starts_with(b"[") { ExternalArgument::Regular(parse_list_expression(working_set, span, &SyntaxShape::Any)) + } else if contents.starts_with(b"r#") { + ExternalArgument::Regular(parse_raw_string(working_set, span)) } else if contents.len() > 3 && contents.starts_with(b"...") && (contents[3] == b'$' || contents[3] == b'[' || contents[3] == b'(') diff --git a/crates/nu-parser/tests/test_parser.rs b/crates/nu-parser/tests/test_parser.rs index e73f0f2e02..9b8ae94317 100644 --- a/crates/nu-parser/tests/test_parser.rs +++ b/crates/nu-parser/tests/test_parser.rs @@ -1,6 +1,6 @@ use nu_parser::*; use nu_protocol::{ - ast::{Argument, Call, Expr, PathMember, Range}, + ast::{Argument, Call, Expr, ExternalArgument, PathMember, Range}, engine::{Command, EngineState, Stack, StateWorkingSet}, ParseError, PipelineData, ShellError, Signature, Span, SyntaxShape, }; @@ -926,6 +926,28 @@ mod string { assert!(working_set.parse_errors.is_empty()); } } + + #[test] + fn parse_raw_string_as_external_argument() { + let engine_state = EngineState::new(); + let mut working_set = StateWorkingSet::new(&engine_state); + + let block = parse(&mut working_set, None, b"^echo r#'text'#", true); + + assert!(working_set.parse_errors.is_empty()); + assert_eq!(block.len(), 1); + let pipeline = &block.pipelines[0]; + assert_eq!(pipeline.len(), 1); + let element = &pipeline.elements[0]; + assert!(element.redirection.is_none()); + if let Expr::ExternalCall(_, args) = &element.expr.expr { + if let [ExternalArgument::Regular(expr)] = args.as_ref() { + assert_eq!(expr.expr, Expr::RawString("text".into())); + return; + } + } + panic!("wrong expression: {:?}", element.expr.expr) + } } #[rstest] diff --git a/tests/shell/pipeline/commands/external.rs b/tests/shell/pipeline/commands/external.rs index 88d443e4fd..3cbed5a124 100644 --- a/tests/shell/pipeline/commands/external.rs +++ b/tests/shell/pipeline/commands/external.rs @@ -314,6 +314,7 @@ mod external_words { use super::nu; use nu_test_support::fs::Stub::FileWithContent; use nu_test_support::{pipeline, playground::Playground}; + #[test] fn relaxed_external_words() { let actual = nu!(" @@ -323,6 +324,12 @@ mod external_words { assert_eq!(actual.out, "joturner@foo.bar.baz"); } + #[test] + fn raw_string_as_external_argument() { + let actual = nu!("nu --testbin cococo r#'asdf'#"); + assert_eq!(actual.out, "asdf"); + } + //FIXME: jt: limitation in testing - can't use single ticks currently #[ignore] #[test] From b9a7faad5adb59e1a9fae444586caa27f9e4058a Mon Sep 17 00:00:00 2001 From: YizhePKU Date: Sat, 11 May 2024 00:06:33 +0800 Subject: [PATCH 016/137] Implement PWD recovery (#12779) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR has two parts. The first part is the addition of the `Stack::set_pwd()` API. It strips trailing slashes from paths for convenience, but will reject otherwise bad paths, leaving PWD in a good state. This should reduce the impact of faulty code incorrectly trying to set PWD. (https://github.com/nushell/nushell/pull/12760#issuecomment-2095393012) The second part is implementing a PWD recovery mechanism. PWD can become bad even when we did nothing wrong. For example, Unix allows you to remove any directory when another process might still be using it, which means PWD can just "disappear" under our nose. This PR makes it possible to use `cd` to reset PWD into a good state. Here's a demonstration: ```sh mkdir /tmp/foo cd /tmp/foo # delete "/tmp/foo" in a subshell, because Nushell is smart and refuse to delete PWD nu -c 'cd /; rm -r /tmp/foo' ls # Error: × $env.PWD points to a non-existent directory # help: Use `cd` to reset $env.PWD into a good state cd / pwd # prints / ``` Also, auto-cd should be working again. --- Cargo.lock | 1 + crates/nu-cli/Cargo.toml | 1 + crates/nu-cli/src/repl.rs | 140 +++++++++++++++++- crates/nu-cmd-base/src/util.rs | 9 +- crates/nu-command/src/filesystem/cd.rs | 27 ++-- crates/nu-command/tests/commands/cd.rs | 14 ++ crates/nu-engine/src/eval.rs | 2 +- crates/nu-protocol/src/engine/engine_state.rs | 19 ++- crates/nu-protocol/src/engine/stack.rs | 34 +++++ 9 files changed, 214 insertions(+), 33 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6dcf38f7cb..f55ed5ceb4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2852,6 +2852,7 @@ dependencies = [ "reedline", "rstest", "sysinfo", + "tempfile", "unicode-segmentation", "uuid", "which", diff --git a/crates/nu-cli/Cargo.toml b/crates/nu-cli/Cargo.toml index 3423450cf3..e631f7ccf6 100644 --- a/crates/nu-cli/Cargo.toml +++ b/crates/nu-cli/Cargo.toml @@ -15,6 +15,7 @@ nu-cmd-lang = { path = "../nu-cmd-lang", version = "0.93.1" } nu-command = { path = "../nu-command", version = "0.93.1" } nu-test-support = { path = "../nu-test-support", version = "0.93.1" } rstest = { workspace = true, default-features = false } +tempfile = { workspace = true } [dependencies] nu-cmd-base = { path = "../nu-cmd-base", version = "0.93.1" } diff --git a/crates/nu-cli/src/repl.rs b/crates/nu-cli/src/repl.rs index 3fe51db496..2482a7920e 100644 --- a/crates/nu-cli/src/repl.rs +++ b/crates/nu-cli/src/repl.rs @@ -870,7 +870,7 @@ fn parse_operation( let tokens = lex(s.as_bytes(), 0, &[], &[], false); // Check if this is a single call to a directory, if so auto-cd #[allow(deprecated)] - let cwd = nu_engine::env::current_dir_str(engine_state, stack)?; + let cwd = nu_engine::env::current_dir_str(engine_state, stack).unwrap_or_default(); let mut orig = s.clone(); if orig.starts_with('`') { orig = trim_quotes_str(&orig).to_string() @@ -927,7 +927,10 @@ fn do_auto_cd( //FIXME: this only changes the current scope, but instead this environment variable //should probably be a block that loads the information from the state in the overlay - stack.add_env_var("PWD".into(), Value::string(path.clone(), Span::unknown())); + if let Err(err) = stack.set_cwd(&path) { + report_error_new(engine_state, &err); + return; + }; let cwd = Value::string(cwd, span); let shells = stack.get_env_var(engine_state, "NUSHELL_SHELLS"); @@ -1479,3 +1482,136 @@ fn are_session_ids_in_sync() { engine_state.history_session_id ); } + +#[cfg(test)] +mod test_auto_cd { + use super::{do_auto_cd, parse_operation, ReplOperation}; + use nu_protocol::engine::{EngineState, Stack}; + use std::path::Path; + use tempfile::tempdir; + + /// Create a symlink. Works on both Unix and Windows. + #[cfg(any(unix, windows))] + fn symlink(original: impl AsRef, link: impl AsRef) -> std::io::Result<()> { + #[cfg(unix)] + { + std::os::unix::fs::symlink(original, link) + } + #[cfg(windows)] + { + if original.as_ref().is_dir() { + std::os::windows::fs::symlink_dir(original, link) + } else { + std::os::windows::fs::symlink_file(original, link) + } + } + } + + /// Run one test case on the auto-cd feature. PWD is initially set to + /// `before`, and after `input` is parsed and evaluated, PWD should be + /// changed to `after`. + #[track_caller] + fn check(before: impl AsRef, input: &str, after: impl AsRef) { + // Setup EngineState and Stack. + let mut engine_state = EngineState::new(); + let mut stack = Stack::new(); + stack.set_cwd(before).unwrap(); + + // Parse the input. It must be an auto-cd operation. + let op = parse_operation(input.to_string(), &engine_state, &stack).unwrap(); + let ReplOperation::AutoCd { cwd, target, span } = op else { + panic!("'{}' was not parsed into an auto-cd operation", input) + }; + + // Perform the auto-cd operation. + do_auto_cd(target, cwd, &mut stack, &mut engine_state, span); + let updated_cwd = engine_state.cwd(Some(&stack)).unwrap(); + + // Check that `updated_cwd` and `after` point to the same place. They + // don't have to be byte-wise equal (on Windows, the 8.3 filename + // conversion messes things up), + let updated_cwd = std::fs::canonicalize(updated_cwd).unwrap(); + let after = std::fs::canonicalize(after).unwrap(); + assert_eq!(updated_cwd, after); + } + + #[test] + fn auto_cd_root() { + let tempdir = tempdir().unwrap(); + let root = if cfg!(windows) { r"C:\" } else { "/" }; + check(&tempdir, root, root); + } + + #[test] + fn auto_cd_tilde() { + let tempdir = tempdir().unwrap(); + let home = nu_path::home_dir().unwrap(); + check(&tempdir, "~", home); + } + + #[test] + fn auto_cd_dot() { + let tempdir = tempdir().unwrap(); + check(&tempdir, ".", &tempdir); + } + + #[test] + fn auto_cd_double_dot() { + let tempdir = tempdir().unwrap(); + let dir = tempdir.path().join("foo"); + std::fs::create_dir_all(&dir).unwrap(); + check(dir, "..", &tempdir); + } + + #[test] + fn auto_cd_triple_dot() { + let tempdir = tempdir().unwrap(); + let dir = tempdir.path().join("foo").join("bar"); + std::fs::create_dir_all(&dir).unwrap(); + check(dir, "...", &tempdir); + } + + #[test] + fn auto_cd_relative() { + let tempdir = tempdir().unwrap(); + let foo = tempdir.path().join("foo"); + let bar = tempdir.path().join("bar"); + std::fs::create_dir_all(&foo).unwrap(); + std::fs::create_dir_all(&bar).unwrap(); + + let input = if cfg!(windows) { r"..\bar" } else { "../bar" }; + check(foo, input, bar); + } + + #[test] + fn auto_cd_trailing_slash() { + let tempdir = tempdir().unwrap(); + let dir = tempdir.path().join("foo"); + std::fs::create_dir_all(&dir).unwrap(); + + let input = if cfg!(windows) { r"foo\" } else { "foo/" }; + check(&tempdir, input, dir); + } + + #[test] + fn auto_cd_symlink() { + let tempdir = tempdir().unwrap(); + let dir = tempdir.path().join("foo"); + std::fs::create_dir_all(&dir).unwrap(); + let link = tempdir.path().join("link"); + symlink(&dir, &link).unwrap(); + + let input = if cfg!(windows) { r".\link" } else { "./link" }; + check(&tempdir, input, link); + } + + #[test] + #[should_panic(expected = "was not parsed into an auto-cd operation")] + fn auto_cd_nonexistent_directory() { + let tempdir = tempdir().unwrap(); + let dir = tempdir.path().join("foo"); + + let input = if cfg!(windows) { r"foo\" } else { "foo/" }; + check(&tempdir, input, dir); + } +} diff --git a/crates/nu-cmd-base/src/util.rs b/crates/nu-cmd-base/src/util.rs index 4d19fdb3c6..9c63dec836 100644 --- a/crates/nu-cmd-base/src/util.rs +++ b/crates/nu-cmd-base/src/util.rs @@ -1,6 +1,6 @@ use nu_protocol::{ engine::{EngineState, Stack}, - report_error_new, Range, ShellError, Span, Value, + Range, ShellError, Span, Value, }; use std::{ops::Bound, path::PathBuf}; @@ -13,10 +13,9 @@ pub fn get_init_cwd() -> PathBuf { } pub fn get_guaranteed_cwd(engine_state: &EngineState, stack: &Stack) -> PathBuf { - engine_state.cwd(Some(stack)).unwrap_or_else(|e| { - report_error_new(engine_state, &e); - crate::util::get_init_cwd() - }) + engine_state + .cwd(Some(stack)) + .unwrap_or(crate::util::get_init_cwd()) } type MakeRangeError = fn(&str, Span) -> ShellError; diff --git a/crates/nu-command/src/filesystem/cd.rs b/crates/nu-command/src/filesystem/cd.rs index f90e9bfd2a..57fe1c17e3 100644 --- a/crates/nu-command/src/filesystem/cd.rs +++ b/crates/nu-command/src/filesystem/cd.rs @@ -1,3 +1,4 @@ +use nu_cmd_base::util::get_init_cwd; use nu_engine::command_prelude::*; use nu_utils::filesystem::{have_permission, PermissionResult}; @@ -39,7 +40,10 @@ impl Command for Cd { ) -> Result { let physical = call.has_flag(engine_state, stack, "physical")?; let path_val: Option> = call.opt(engine_state, stack, 0)?; - let cwd = engine_state.cwd(Some(stack))?; + + // If getting PWD failed, default to the initial directory. This way, the + // user can use `cd` to recover PWD to a good state. + let cwd = engine_state.cwd(Some(stack)).unwrap_or(get_init_cwd()); let path_val = { if let Some(path) = path_val { @@ -52,13 +56,13 @@ impl Command for Cd { } }; - let (path, span) = match path_val { + let path = match path_val { Some(v) => { if v.item == "-" { if let Some(oldpwd) = stack.get_env_var(engine_state, "OLDPWD") { - (oldpwd.to_path()?, v.span) + oldpwd.to_path()? } else { - (cwd, v.span) + cwd } } else { // Trim whitespace from the end of path. @@ -66,7 +70,7 @@ impl Command for Cd { &v.item.trim_end_matches(|x| matches!(x, '\x09'..='\x0d')); // If `--physical` is specified, canonicalize the path; otherwise expand the path. - let path = if physical { + if physical { if let Ok(path) = nu_path::canonicalize_with(path_no_whitespace, &cwd) { if !path.is_dir() { return Err(ShellError::NotADirectory { span: v.span }); @@ -90,19 +94,12 @@ impl Command for Cd { return Err(ShellError::NotADirectory { span: v.span }); }; path - }; - (path, v.span) + } } } - None => { - let path = nu_path::expand_tilde("~"); - (path, call.head) - } + None => nu_path::expand_tilde("~"), }; - // Strip the trailing slash from the new path. This is required for PWD. - let path = nu_path::strip_trailing_slash(&path); - // Set OLDPWD. // We're using `Stack::get_env_var()` instead of `EngineState::cwd()` to avoid a conversion roundtrip. if let Some(oldpwd) = stack.get_env_var(engine_state, "PWD") { @@ -113,7 +110,7 @@ impl Command for Cd { //FIXME: this only changes the current scope, but instead this environment variable //should probably be a block that loads the information from the state in the overlay PermissionResult::PermissionOk => { - stack.add_env_var("PWD".into(), Value::string(path.to_string_lossy(), span)); + stack.set_cwd(path)?; Ok(PipelineData::empty()) } PermissionResult::PermissionDenied(reason) => Err(ShellError::IOError { diff --git a/crates/nu-command/tests/commands/cd.rs b/crates/nu-command/tests/commands/cd.rs index c7c30528e8..87af52aa4d 100644 --- a/crates/nu-command/tests/commands/cd.rs +++ b/crates/nu-command/tests/commands/cd.rs @@ -312,3 +312,17 @@ fn cd_permission_denied_folder() { assert!(actual.err.contains("Folder is not able to read")); }); } + +#[test] +#[cfg(unix)] +fn pwd_recovery() { + let nu = nu_test_support::fs::executable_path().display().to_string(); + let tmpdir = std::env::temp_dir().join("foobar").display().to_string(); + + // We `cd` into a temporary directory, then spawn another `nu` process to + // delete that directory. Then we attempt to recover by running `cd /`. + let cmd = format!("mkdir {tmpdir}; cd {tmpdir}; {nu} -c 'cd /; rm -r {tmpdir}'; cd /; pwd"); + let actual = nu!(cmd); + + assert_eq!(actual.out, "/"); +} diff --git a/crates/nu-engine/src/eval.rs b/crates/nu-engine/src/eval.rs index 6324b35ae7..8b4333dc55 100644 --- a/crates/nu-engine/src/eval.rs +++ b/crates/nu-engine/src/eval.rs @@ -654,7 +654,7 @@ impl Eval for EvalRuntime { } else if quoted { Ok(Value::string(path, span)) } else { - let cwd = engine_state.cwd(Some(stack))?; + let cwd = engine_state.cwd(Some(stack)).unwrap_or_default(); let path = expand_path_with(path, cwd, true); Ok(Value::string(path.to_string_lossy(), span)) diff --git a/crates/nu-protocol/src/engine/engine_state.rs b/crates/nu-protocol/src/engine/engine_state.rs index 4a0ff4c4ae..bea49b5d6c 100644 --- a/crates/nu-protocol/src/engine/engine_state.rs +++ b/crates/nu-protocol/src/engine/engine_state.rs @@ -931,13 +931,12 @@ impl EngineState { /// directory on the stack that have yet to be merged into the engine state. pub fn cwd(&self, stack: Option<&Stack>) -> Result { // Helper function to create a simple generic error. - // Its messages are not especially helpful, but these errors don't occur often, so it's probably fine. - fn error(msg: &str) -> Result { + fn error(msg: &str, cwd: impl AsRef) -> Result { Err(ShellError::GenericError { error: msg.into(), - msg: "".into(), + msg: format!("$env.PWD = {}", cwd.as_ref().display()), span: None, - help: None, + help: Some("Use `cd` to reset $env.PWD into a good state".into()), inner: vec![], }) } @@ -967,21 +966,21 @@ impl EngineState { // Technically, a root path counts as "having trailing slashes", but // for the purpose of PWD, a root path is acceptable. if !is_root(&path) && has_trailing_slash(&path) { - error("$env.PWD contains trailing slashes") + error("$env.PWD contains trailing slashes", path) } else if !path.is_absolute() { - error("$env.PWD is not an absolute path") + error("$env.PWD is not an absolute path", path) } else if !path.exists() { - error("$env.PWD points to a non-existent directory") + error("$env.PWD points to a non-existent directory", path) } else if !path.is_dir() { - error("$env.PWD points to a non-directory") + error("$env.PWD points to a non-directory", path) } else { Ok(path) } } else { - error("$env.PWD is not a string") + error("$env.PWD is not a string", format!("{pwd:?}")) } } else { - error("$env.PWD not found") + error("$env.PWD not found", "") } } diff --git a/crates/nu-protocol/src/engine/stack.rs b/crates/nu-protocol/src/engine/stack.rs index 2fa71f57fa..b577d55270 100644 --- a/crates/nu-protocol/src/engine/stack.rs +++ b/crates/nu-protocol/src/engine/stack.rs @@ -592,6 +592,40 @@ impl Stack { self.out_dest.pipe_stderr = None; self } + + /// Set the PWD environment variable to `path`. + /// + /// This method accepts `path` with trailing slashes, but they're removed + /// before writing the value into PWD. + pub fn set_cwd(&mut self, path: impl AsRef) -> Result<(), ShellError> { + // Helper function to create a simple generic error. + // Its messages are not especially helpful, but these errors don't occur often, so it's probably fine. + fn error(msg: &str) -> Result<(), ShellError> { + Err(ShellError::GenericError { + error: msg.into(), + msg: "".into(), + span: None, + help: None, + inner: vec![], + }) + } + + let path = path.as_ref(); + + if !path.is_absolute() { + error("Cannot set $env.PWD to a non-absolute path") + } else if !path.exists() { + error("Cannot set $env.PWD to a non-existent directory") + } else if !path.is_dir() { + error("Cannot set $env.PWD to a non-directory") + } else { + // Strip trailing slashes, if any. + let path = nu_path::strip_trailing_slash(path); + let value = Value::string(path.to_string_lossy(), Span::unknown()); + self.add_env_var("PWD".into(), value); + Ok(()) + } + } } #[cfg(test)] From cab86f49c0fb48385dc9d3d5415e18b7b8a1d8d0 Mon Sep 17 00:00:00 2001 From: Ian Manske Date: Sat, 11 May 2024 15:32:00 +0000 Subject: [PATCH 017/137] Fix pipe redirection into `complete` (#12818) # Description Fixes #12796 where a combined out and err pipe redirection (`o+e>|`) into `complete` still provides separate `stdout` and `stderr` columns in the record. Now, the combined output will be in the `stdout` column. This PR also fixes a similar error with the `e>|` pipe redirection. # Tests + Formatting Added two tests. --- crates/nu-command/tests/commands/complete.rs | 13 +++++++++++++ crates/nu-engine/src/eval.rs | 17 ++++++++++++----- 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/crates/nu-command/tests/commands/complete.rs b/crates/nu-command/tests/commands/complete.rs index cc398d4c8c..a5a8b6a256 100644 --- a/crates/nu-command/tests/commands/complete.rs +++ b/crates/nu-command/tests/commands/complete.rs @@ -92,3 +92,16 @@ fn capture_error_with_both_stdout_stderr_messages_not_hang_nushell() { }, ) } + +#[test] +fn combined_pipe_redirection() { + let actual = nu!("$env.FOO = hello; $env.BAR = world; nu --testbin echo_env_mixed out-err FOO BAR o+e>| complete | get stdout"); + assert_eq!(actual.out, "helloworld"); +} + +#[test] +fn err_pipe_redirection() { + let actual = + nu!("$env.FOO = hello; nu --testbin echo_env_stderr FOO e>| complete | get stdout"); + assert_eq!(actual.out, "hello"); +} diff --git a/crates/nu-engine/src/eval.rs b/crates/nu-engine/src/eval.rs index 8b4333dc55..b8d806b708 100644 --- a/crates/nu-engine/src/eval.rs +++ b/crates/nu-engine/src/eval.rs @@ -340,7 +340,13 @@ fn eval_redirection( } Ok(Redirection::file(options.create(true).open(path)?)) } - RedirectionTarget::Pipe { .. } => Ok(Redirection::Pipe(next_out.unwrap_or(OutDest::Pipe))), + RedirectionTarget::Pipe { .. } => { + let dest = match next_out { + None | Some(OutDest::Capture) => OutDest::Pipe, + Some(next) => next, + }; + Ok(Redirection::Pipe(dest)) + } } } @@ -367,11 +373,12 @@ fn eval_element_redirection( } => { let stderr = eval_redirection::(engine_state, stack, target, None)?; if matches!(stderr, Redirection::Pipe(OutDest::Pipe)) { + let dest = match next_out { + None | Some(OutDest::Capture) => OutDest::Pipe, + Some(next) => next, + }; // e>| redirection, don't override current stack `stdout` - Ok(( - None, - Some(next_out.map(Redirection::Pipe).unwrap_or(stderr)), - )) + Ok((None, Some(Redirection::Pipe(dest)))) } else { Ok((next_out.map(Redirection::Pipe), Some(stderr))) } From 075535f86981d40dfa99836a800833606db66cbc Mon Sep 17 00:00:00 2001 From: Brage Ingebrigtsen <84551201+Skyppex@users.noreply.github.com> Date: Sun, 12 May 2024 01:13:36 +0200 Subject: [PATCH 018/137] remove --not flag for 'str contains' (#12837) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Description This PR resolves an inconsistency between different `str` subcommands, notably `str contains`, `str starts-with` and `str ends-with`. Only the `str contains` command has the `--not` flag and a desicion was made in this #12781 PR to remove the `--not` flag and use the `not` operator instead. Before: `"blob" | str contains --not o` After: `not ("blob" | str contains o)` OR `"blob" | str contains o | not $in` > Note, you can currently do all three, but the first will be broken after this PR is merged. # User-Facing Changes - remove `--not(-n)` flag from `str contains` command - This is a breaking change! # Tests + Formatting - [x] Added tests - [x] Ran `cargo fmt --all` - [x] Ran `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used` - [x] Ran `cargo test --workspace` - [ ] Ran `cargo run -- -c "use std testing; testing run-tests --path crates/nu-std"` - I was unable to get this working. ``` Error: nu::parser::export_not_found × Export not found. ╭─[source:1:9] 1 │ use std testing; testing run-tests --path crates/nu-std · ───┬─── · ╰── could not find imports ╰──── ``` ^ I still can't figure out how to make this work 😂 # After Submitting Requires update of documentation --- .../nu-command/src/strings/str_/contains.rs | 25 +++++++++++-------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/crates/nu-command/src/strings/str_/contains.rs b/crates/nu-command/src/strings/str_/contains.rs index abb0ce1a2b..bc1d5bcf11 100644 --- a/crates/nu-command/src/strings/str_/contains.rs +++ b/crates/nu-command/src/strings/str_/contains.rs @@ -40,7 +40,7 @@ impl Command for SubCommand { "For a data structure input, check strings at the given cell paths, and replace with result.", ) .switch("ignore-case", "search is case insensitive", Some('i')) - .switch("not", "does not contain", Some('n')) + .switch("not", "DEPRECATED OPTION: does not contain", Some('n')) .category(Category::Strings) } @@ -59,6 +59,20 @@ impl Command for SubCommand { call: &Call, input: PipelineData, ) -> Result { + if call.has_flag(engine_state, stack, "not")? { + nu_protocol::report_error_new( + engine_state, + &ShellError::GenericError { + error: "Deprecated option".into(), + msg: "`str contains --not {string}` is deprecated and will be removed in 0.95." + .into(), + span: Some(call.head), + help: Some("Please use the `not` operator instead.".into()), + inner: vec![], + }, + ); + } + let cell_paths: Vec = call.rest(engine_state, stack, 1)?; let cell_paths = (!cell_paths.is_empty()).then_some(cell_paths); let args = Arguments { @@ -120,15 +134,6 @@ impl Command for SubCommand { Value::test_bool(false), ])), }, - Example { - description: "Check if list does not contain string", - example: "[one two three] | str contains --not o", - result: Some(Value::test_list(vec![ - Value::test_bool(false), - Value::test_bool(false), - Value::test_bool(true), - ])), - }, ] } } From 30fc83203508c1c2470a2f6ad188e90f7efbbcb8 Mon Sep 17 00:00:00 2001 From: Ian Manske Date: Sun, 12 May 2024 11:19:28 +0000 Subject: [PATCH 019/137] Fix custom converters with `save` (#12833) # Description Fixes #10429 where `save` fails if a custom command is used as the file format converter. # Tests + Formatting Added a test. --- crates/nu-command/src/filesystem/save.rs | 41 +++++++++++------------- crates/nu-command/tests/commands/save.rs | 17 ++++++++++ 2 files changed, 36 insertions(+), 22 deletions(-) diff --git a/crates/nu-command/src/filesystem/save.rs b/crates/nu-command/src/filesystem/save.rs index 3b92416ab5..0826284798 100644 --- a/crates/nu-command/src/filesystem/save.rs +++ b/crates/nu-command/src/filesystem/save.rs @@ -1,4 +1,5 @@ use crate::progress_bar; +use nu_engine::get_eval_block; #[allow(deprecated)] use nu_engine::{command_prelude::*, current_dir}; use nu_path::expand_path_with; @@ -311,12 +312,13 @@ fn input_to_bytes( .map(|name| name.to_string_lossy().to_string()) }; - if let Some(ext) = ext { - convert_to_extension(engine_state, &ext, stack, input, span) + let input = if let Some(ext) = ext { + convert_to_extension(engine_state, &ext, stack, input, span)? } else { - let value = input.into_value(span); - value_to_bytes(value) - } + input + }; + + value_to_bytes(input.into_value(span)) } /// Convert given data into content of file of specified extension if @@ -328,24 +330,19 @@ fn convert_to_extension( stack: &mut Stack, input: PipelineData, span: Span, -) -> Result, ShellError> { - let converter = engine_state.find_decl(format!("to {extension}").as_bytes(), &[]); - - let output = match converter { - Some(converter_id) => { - let output = engine_state.get_decl(converter_id).run( - engine_state, - stack, - &Call::new(span), - input, - )?; - - output.into_value(span) +) -> Result { + if let Some(decl_id) = engine_state.find_decl(format!("to {extension}").as_bytes(), &[]) { + let decl = engine_state.get_decl(decl_id); + if let Some(block_id) = decl.get_block_id() { + let block = engine_state.get_block(block_id); + let eval_block = get_eval_block(engine_state); + eval_block(engine_state, stack, block, input) + } else { + decl.run(engine_state, stack, &Call::new(span), input) } - None => input.into_value(span), - }; - - value_to_bytes(output) + } else { + Ok(input) + } } /// Convert [`Value::String`] [`Value::Binary`] or [`Value::List`] into [`Vec`] of bytes diff --git a/crates/nu-command/tests/commands/save.rs b/crates/nu-command/tests/commands/save.rs index b5776a7bb9..ef0304dc7c 100644 --- a/crates/nu-command/tests/commands/save.rs +++ b/crates/nu-command/tests/commands/save.rs @@ -407,3 +407,20 @@ fn save_same_file_without_extension_pipeline() { .contains("pipeline input and output are the same file")); }) } + +#[test] +fn save_with_custom_converter() { + Playground::setup("save_with_custom_converter", |dirs, _| { + let file = dirs.test().join("test.ndjson"); + + nu!(cwd: dirs.test(), pipeline( + r#" + def "to ndjson" []: any -> string { each { to json --raw } | to text } ; + {a: 1, b: 2} | save test.ndjson + "# + )); + + let actual = file_contents(file); + assert_eq!(actual, r#"{"a":1,"b":2}"#); + }) +} From c70c43aae94c674162ded5880fc549648b661e54 Mon Sep 17 00:00:00 2001 From: NotTheDr01ds <32344964+NotTheDr01ds@users.noreply.github.com> Date: Sun, 12 May 2024 21:55:07 -0400 Subject: [PATCH 020/137] Add example and search term for 'repeat' to the `fill` command (#12844) # Description It's commonly forgotten or overlooked that a lot of `std repeat` functionality can be handled with the built-in `fill`. Added 'repeat` as a search term for `fill` to improve discoverability. Also replaced one of the existing examples with one `fill`ing an empty string, a la `repeat`. There were 6 examples already, and 3 of them pretty much were variations on the same theme, so I repurposed one of those rather than adding a 7th. # User-Facing Changes Changes to `help` only # Tests + Formatting - :green_circle: `toolkit fmt` - :green_circle: `toolkit clippy` - :green_circle: `toolkit test` - :green_circle: `toolkit test stdlib` # After Submitting I assume the "Commands" doc is auto-generated from the `help`, but I'll double-check that assumption. --- crates/nu-command/src/conversions/fill.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/crates/nu-command/src/conversions/fill.rs b/crates/nu-command/src/conversions/fill.rs index 76ec1e81e5..6507e4a368 100644 --- a/crates/nu-command/src/conversions/fill.rs +++ b/crates/nu-command/src/conversions/fill.rs @@ -73,7 +73,7 @@ impl Command for Fill { } fn search_terms(&self) -> Vec<&str> { - vec!["display", "render", "format", "pad", "align"] + vec!["display", "render", "format", "pad", "align", "repeat"] } fn examples(&self) -> Vec { @@ -91,9 +91,9 @@ impl Command for Fill { result: Some(Value::string("────────nushell", Span::test_data())), }, Example { - description: "Fill a string on both sides to a width of 15 with the character '─'", - example: "'nushell' | fill --alignment m --character '─' --width 15", - result: Some(Value::string("────nushell────", Span::test_data())), + description: "Fill an empty string with 10 '─' characters", + example: "'' | fill --character '─' --width 10", + result: Some(Value::string("──────────", Span::test_data())), }, Example { description: From c4dca5fe03c0a33de0184564a62f37093b1c55d3 Mon Sep 17 00:00:00 2001 From: francesco-gaglione <94604837+francesco-gaglione@users.noreply.github.com> Date: Mon, 13 May 2024 15:37:53 +0200 Subject: [PATCH 021/137] Merged tests to produce a single binary (#12826) This PR should close #7147 # Description Merged src/tests into /tests to produce a single binary. ![image](https://github.com/nushell/nushell/assets/94604837/84726469-d447-4619-b6d1-2d1415d0f42e) # User-Facing Changes No user facing changes # Tests + Formatting Moved tests. Tollkit check pr pass. # After Submitting --------- Co-authored-by: Ian Manske --- CONTRIBUTING.md | 1 - src/main.rs | 2 -- tests/main.rs | 1 + tests/repl/mod.rs | 27 +++++++++++++++++++ {src/tests => tests/repl}/test_bits.rs | 2 +- {src/tests => tests/repl}/test_cell_path.rs | 2 +- {src/tests => tests/repl}/test_commandline.rs | 2 +- .../tests => tests/repl}/test_conditionals.rs | 2 +- {src/tests => tests/repl}/test_config.rs | 3 +-- {src/tests => tests/repl}/test_config_path.rs | 0 {src/tests => tests/repl}/test_converters.rs | 2 +- .../repl}/test_custom_commands.rs | 2 +- {src/tests => tests/repl}/test_engine.rs | 2 +- {src/tests => tests/repl}/test_env.rs | 2 +- {src/tests => tests/repl}/test_help.rs | 2 +- {src/tests => tests/repl}/test_hiding.rs | 2 +- {src/tests => tests/repl}/test_ide.rs | 2 +- {src/tests => tests/repl}/test_iteration.rs | 2 +- .../repl}/test_known_external.rs | 3 +-- {src/tests => tests/repl}/test_math.rs | 2 +- {src/tests => tests/repl}/test_modules.rs | 2 +- {src/tests => tests/repl}/test_parser.rs | 4 +-- {src/tests => tests/repl}/test_ranges.rs | 2 +- {src/tests => tests/repl}/test_regex.rs | 2 +- {src/tests => tests/repl}/test_signatures.rs | 2 +- {src/tests => tests/repl}/test_spread.rs | 2 +- {src/tests => tests/repl}/test_stdlib.rs | 2 +- {src/tests => tests/repl}/test_strings.rs | 2 +- .../repl}/test_table_operations.rs | 2 +- {src/tests => tests/repl}/test_type_check.rs | 2 +- {src => tests/repl}/tests.rs | 27 ------------------- 31 files changed, 53 insertions(+), 59 deletions(-) create mode 100644 tests/repl/mod.rs rename {src/tests => tests/repl}/test_bits.rs (97%) rename {src/tests => tests/repl}/test_cell_path.rs (98%) rename {src/tests => tests/repl}/test_commandline.rs (98%) rename {src/tests => tests/repl}/test_conditionals.rs (96%) rename {src/tests => tests/repl}/test_config.rs (98%) rename {src/tests => tests/repl}/test_config_path.rs (100%) rename {src/tests => tests/repl}/test_converters.rs (96%) rename {src/tests => tests/repl}/test_custom_commands.rs (98%) rename {src/tests => tests/repl}/test_engine.rs (99%) rename {src/tests => tests/repl}/test_env.rs (91%) rename {src/tests => tests/repl}/test_help.rs (94%) rename {src/tests => tests/repl}/test_hiding.rs (99%) rename {src/tests => tests/repl}/test_ide.rs (76%) rename {src/tests => tests/repl}/test_iteration.rs (94%) rename {src/tests => tests/repl}/test_known_external.rs (97%) rename {src/tests => tests/repl}/test_math.rs (98%) rename {src/tests => tests/repl}/test_modules.rs (98%) rename {src/tests => tests/repl}/test_parser.rs (99%) rename {src/tests => tests/repl}/test_ranges.rs (92%) rename {src/tests => tests/repl}/test_regex.rs (96%) rename {src/tests => tests/repl}/test_signatures.rs (99%) rename {src/tests => tests/repl}/test_spread.rs (98%) rename {src/tests => tests/repl}/test_stdlib.rs (86%) rename {src/tests => tests/repl}/test_strings.rs (98%) rename {src/tests => tests/repl}/test_table_operations.rs (99%) rename {src/tests => tests/repl}/test_type_check.rs (98%) rename {src => tests/repl}/tests.rs (89%) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 1ecda698c7..d5cf758b56 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -55,7 +55,6 @@ It is good practice to cover your changes with a test. Also, try to think about Tests can be found in different places: * `/tests` -* `src/tests` * command examples * crate-specific tests diff --git a/src/main.rs b/src/main.rs index 714b932df8..db0c80d4f2 100644 --- a/src/main.rs +++ b/src/main.rs @@ -7,8 +7,6 @@ mod signals; #[cfg(unix)] mod terminal; mod test_bins; -#[cfg(test)] -mod tests; #[cfg(feature = "mimalloc")] #[global_allocator] diff --git a/tests/main.rs b/tests/main.rs index db44c4e019..6824f26f13 100644 --- a/tests/main.rs +++ b/tests/main.rs @@ -11,5 +11,6 @@ mod path; mod plugin_persistence; #[cfg(feature = "plugin")] mod plugins; +mod repl; mod scope; mod shell; diff --git a/tests/repl/mod.rs b/tests/repl/mod.rs new file mode 100644 index 0000000000..5ed7f29ba5 --- /dev/null +++ b/tests/repl/mod.rs @@ -0,0 +1,27 @@ +mod test_bits; +mod test_cell_path; +mod test_commandline; +mod test_conditionals; +mod test_config; +mod test_config_path; +mod test_converters; +mod test_custom_commands; +mod test_engine; +mod test_env; +mod test_help; +mod test_hiding; +mod test_ide; +mod test_iteration; +mod test_known_external; +mod test_math; +mod test_modules; +mod test_parser; +mod test_ranges; +mod test_regex; +mod test_signatures; +mod test_spread; +mod test_stdlib; +mod test_strings; +mod test_table_operations; +mod test_type_check; +mod tests; diff --git a/src/tests/test_bits.rs b/tests/repl/test_bits.rs similarity index 97% rename from src/tests/test_bits.rs rename to tests/repl/test_bits.rs index fdb672d023..410f48d83b 100644 --- a/src/tests/test_bits.rs +++ b/tests/repl/test_bits.rs @@ -1,4 +1,4 @@ -use crate::tests::{run_test, TestResult}; +use crate::repl::tests::{run_test, TestResult}; #[test] fn bits_and() -> TestResult { diff --git a/src/tests/test_cell_path.rs b/tests/repl/test_cell_path.rs similarity index 98% rename from src/tests/test_cell_path.rs rename to tests/repl/test_cell_path.rs index 56b6751638..c89c50259b 100644 --- a/src/tests/test_cell_path.rs +++ b/tests/repl/test_cell_path.rs @@ -1,4 +1,4 @@ -use crate::tests::{fail_test, run_test, TestResult}; +use crate::repl::tests::{fail_test, run_test, TestResult}; // Tests for null / null / Value::Nothing #[test] diff --git a/src/tests/test_commandline.rs b/tests/repl/test_commandline.rs similarity index 98% rename from src/tests/test_commandline.rs rename to tests/repl/test_commandline.rs index 130faf933a..0443ee4d6c 100644 --- a/src/tests/test_commandline.rs +++ b/tests/repl/test_commandline.rs @@ -1,4 +1,4 @@ -use crate::tests::{fail_test, run_test, TestResult}; +use crate::repl::tests::{fail_test, run_test, TestResult}; #[test] fn commandline_test_get_empty() -> TestResult { diff --git a/src/tests/test_conditionals.rs b/tests/repl/test_conditionals.rs similarity index 96% rename from src/tests/test_conditionals.rs rename to tests/repl/test_conditionals.rs index 20307ffe65..dd1e16c5a8 100644 --- a/src/tests/test_conditionals.rs +++ b/tests/repl/test_conditionals.rs @@ -1,4 +1,4 @@ -use crate::tests::{run_test, TestResult}; +use crate::repl::tests::{run_test, TestResult}; #[test] fn if_test1() -> TestResult { diff --git a/src/tests/test_config.rs b/tests/repl/test_config.rs similarity index 98% rename from src/tests/test_config.rs rename to tests/repl/test_config.rs index 96bd7f714d..005ae63171 100644 --- a/src/tests/test_config.rs +++ b/tests/repl/test_config.rs @@ -1,5 +1,4 @@ -use super::{fail_test, run_test, run_test_std}; -use crate::tests::TestResult; +use crate::repl::tests::{fail_test, run_test, run_test_std, TestResult}; #[test] fn mutate_nu_config() -> TestResult { diff --git a/src/tests/test_config_path.rs b/tests/repl/test_config_path.rs similarity index 100% rename from src/tests/test_config_path.rs rename to tests/repl/test_config_path.rs diff --git a/src/tests/test_converters.rs b/tests/repl/test_converters.rs similarity index 96% rename from src/tests/test_converters.rs rename to tests/repl/test_converters.rs index c41868539e..cfa165e95d 100644 --- a/src/tests/test_converters.rs +++ b/tests/repl/test_converters.rs @@ -1,4 +1,4 @@ -use crate::tests::{run_test, TestResult}; +use crate::repl::tests::{run_test, TestResult}; #[test] fn from_json_1() -> TestResult { diff --git a/src/tests/test_custom_commands.rs b/tests/repl/test_custom_commands.rs similarity index 98% rename from src/tests/test_custom_commands.rs rename to tests/repl/test_custom_commands.rs index 4cc81878e4..43a24fa250 100644 --- a/src/tests/test_custom_commands.rs +++ b/tests/repl/test_custom_commands.rs @@ -1,4 +1,4 @@ -use crate::tests::{fail_test, run_test, run_test_contains, TestResult}; +use crate::repl::tests::{fail_test, run_test, run_test_contains, TestResult}; use nu_test_support::nu; use pretty_assertions::assert_eq; diff --git a/src/tests/test_engine.rs b/tests/repl/test_engine.rs similarity index 99% rename from src/tests/test_engine.rs rename to tests/repl/test_engine.rs index 9f5be84763..c1eb919afa 100644 --- a/src/tests/test_engine.rs +++ b/tests/repl/test_engine.rs @@ -1,4 +1,4 @@ -use crate::tests::{fail_test, run_test, TestResult}; +use crate::repl::tests::{fail_test, run_test, TestResult}; use rstest::rstest; #[test] diff --git a/src/tests/test_env.rs b/tests/repl/test_env.rs similarity index 91% rename from src/tests/test_env.rs rename to tests/repl/test_env.rs index 159b3d0a20..7963f7580b 100644 --- a/src/tests/test_env.rs +++ b/tests/repl/test_env.rs @@ -1,4 +1,4 @@ -use crate::tests::{fail_test, run_test, TestResult}; +use crate::repl::tests::{fail_test, run_test, TestResult}; use nu_test_support::nu; #[test] diff --git a/src/tests/test_help.rs b/tests/repl/test_help.rs similarity index 94% rename from src/tests/test_help.rs rename to tests/repl/test_help.rs index b529f7cd19..16d168587a 100644 --- a/src/tests/test_help.rs +++ b/tests/repl/test_help.rs @@ -1,4 +1,4 @@ -use crate::tests::{run_test, TestResult}; +use crate::repl::tests::{run_test, TestResult}; use rstest::rstest; #[rstest] diff --git a/src/tests/test_hiding.rs b/tests/repl/test_hiding.rs similarity index 99% rename from src/tests/test_hiding.rs rename to tests/repl/test_hiding.rs index 81484cc51e..ee2016c54d 100644 --- a/src/tests/test_hiding.rs +++ b/tests/repl/test_hiding.rs @@ -1,4 +1,4 @@ -use crate::tests::{fail_test, run_test, TestResult}; +use crate::repl::tests::{fail_test, run_test, TestResult}; // TODO: Test the use/hide tests also as separate lines in REPL (i.e., with merging the delta in between) #[test] diff --git a/src/tests/test_ide.rs b/tests/repl/test_ide.rs similarity index 76% rename from src/tests/test_ide.rs rename to tests/repl/test_ide.rs index b5e40cfe8d..45cc87aa4d 100644 --- a/src/tests/test_ide.rs +++ b/tests/repl/test_ide.rs @@ -1,4 +1,4 @@ -use crate::tests::{test_ide_contains, TestResult}; +use crate::repl::tests::{test_ide_contains, TestResult}; #[test] fn parser_recovers() -> TestResult { diff --git a/src/tests/test_iteration.rs b/tests/repl/test_iteration.rs similarity index 94% rename from src/tests/test_iteration.rs rename to tests/repl/test_iteration.rs index da3eef9ce2..bcc21b9ef9 100644 --- a/src/tests/test_iteration.rs +++ b/tests/repl/test_iteration.rs @@ -1,4 +1,4 @@ -use crate::tests::{run_test, TestResult}; +use crate::repl::tests::{run_test, TestResult}; #[test] fn better_block_types() -> TestResult { diff --git a/src/tests/test_known_external.rs b/tests/repl/test_known_external.rs similarity index 97% rename from src/tests/test_known_external.rs rename to tests/repl/test_known_external.rs index 586ee40cd3..c140f4ee90 100644 --- a/src/tests/test_known_external.rs +++ b/tests/repl/test_known_external.rs @@ -1,7 +1,6 @@ +use crate::repl::tests::{fail_test, run_test, run_test_contains, TestResult}; use std::process::Command; -use crate::tests::{fail_test, run_test, run_test_contains, TestResult}; - // cargo version prints a string of the form: // cargo 1.60.0 (d1fd9fe2c 2022-03-01) diff --git a/src/tests/test_math.rs b/tests/repl/test_math.rs similarity index 98% rename from src/tests/test_math.rs rename to tests/repl/test_math.rs index ed372b198a..af44f8a857 100644 --- a/src/tests/test_math.rs +++ b/tests/repl/test_math.rs @@ -1,4 +1,4 @@ -use crate::tests::{fail_test, run_test, TestResult}; +use crate::repl::tests::{fail_test, run_test, TestResult}; #[test] fn add_simple() -> TestResult { diff --git a/src/tests/test_modules.rs b/tests/repl/test_modules.rs similarity index 98% rename from src/tests/test_modules.rs rename to tests/repl/test_modules.rs index 26ca62bf49..dd62c74b77 100644 --- a/src/tests/test_modules.rs +++ b/tests/repl/test_modules.rs @@ -1,4 +1,4 @@ -use crate::tests::{fail_test, run_test, TestResult}; +use crate::repl::tests::{fail_test, run_test, TestResult}; #[test] fn module_def_imports_1() -> TestResult { diff --git a/src/tests/test_parser.rs b/tests/repl/test_parser.rs similarity index 99% rename from src/tests/test_parser.rs rename to tests/repl/test_parser.rs index 3d19f05a9c..c0e7279b7b 100644 --- a/src/tests/test_parser.rs +++ b/tests/repl/test_parser.rs @@ -1,9 +1,7 @@ -use crate::tests::{fail_test, run_test, run_test_with_env, TestResult}; +use crate::repl::tests::{fail_test, run_test, run_test_contains, run_test_with_env, TestResult}; use nu_test_support::{nu, nu_repl_code}; use std::collections::HashMap; -use super::run_test_contains; - #[test] fn env_shorthand() -> TestResult { run_test("FOO=BAR if false { 3 } else { 4 }", "4") diff --git a/src/tests/test_ranges.rs b/tests/repl/test_ranges.rs similarity index 92% rename from src/tests/test_ranges.rs rename to tests/repl/test_ranges.rs index 3a007196e6..96f59eea84 100644 --- a/src/tests/test_ranges.rs +++ b/tests/repl/test_ranges.rs @@ -1,4 +1,4 @@ -use crate::tests::{fail_test, run_test, TestResult}; +use crate::repl::tests::{fail_test, run_test, TestResult}; #[test] fn int_in_inc_range() -> TestResult { diff --git a/src/tests/test_regex.rs b/tests/repl/test_regex.rs similarity index 96% rename from src/tests/test_regex.rs rename to tests/repl/test_regex.rs index adb59cc4a7..6f9063c81a 100644 --- a/src/tests/test_regex.rs +++ b/tests/repl/test_regex.rs @@ -1,4 +1,4 @@ -use crate::tests::{fail_test, run_test, TestResult}; +use crate::repl::tests::{fail_test, run_test, TestResult}; #[test] fn contains() -> TestResult { diff --git a/src/tests/test_signatures.rs b/tests/repl/test_signatures.rs similarity index 99% rename from src/tests/test_signatures.rs rename to tests/repl/test_signatures.rs index 1ca395c90f..5e265974d5 100644 --- a/src/tests/test_signatures.rs +++ b/tests/repl/test_signatures.rs @@ -1,4 +1,4 @@ -use crate::tests::{fail_test, run_test, TestResult}; +use crate::repl::tests::{fail_test, run_test, TestResult}; #[test] fn list_annotations() -> TestResult { diff --git a/src/tests/test_spread.rs b/tests/repl/test_spread.rs similarity index 98% rename from src/tests/test_spread.rs rename to tests/repl/test_spread.rs index 3dceea7089..419188be33 100644 --- a/src/tests/test_spread.rs +++ b/tests/repl/test_spread.rs @@ -1,4 +1,4 @@ -use crate::tests::{fail_test, run_test, TestResult}; +use crate::repl::tests::{fail_test, run_test, TestResult}; use nu_test_support::nu; #[test] diff --git a/src/tests/test_stdlib.rs b/tests/repl/test_stdlib.rs similarity index 86% rename from src/tests/test_stdlib.rs rename to tests/repl/test_stdlib.rs index 3be252ef77..401fd3c0aa 100644 --- a/src/tests/test_stdlib.rs +++ b/tests/repl/test_stdlib.rs @@ -1,4 +1,4 @@ -use crate::tests::{fail_test, run_test_std, TestResult}; +use crate::repl::tests::{fail_test, run_test_std, TestResult}; #[test] fn library_loaded() -> TestResult { diff --git a/src/tests/test_strings.rs b/tests/repl/test_strings.rs similarity index 98% rename from src/tests/test_strings.rs rename to tests/repl/test_strings.rs index 7d2ae1de84..cafd6cc681 100644 --- a/src/tests/test_strings.rs +++ b/tests/repl/test_strings.rs @@ -1,4 +1,4 @@ -use crate::tests::{fail_test, run_test, TestResult}; +use crate::repl::tests::{fail_test, run_test, TestResult}; #[test] fn cjk_in_substrings() -> TestResult { diff --git a/src/tests/test_table_operations.rs b/tests/repl/test_table_operations.rs similarity index 99% rename from src/tests/test_table_operations.rs rename to tests/repl/test_table_operations.rs index 9971261190..5d61e0d902 100644 --- a/src/tests/test_table_operations.rs +++ b/tests/repl/test_table_operations.rs @@ -1,4 +1,4 @@ -use crate::tests::{fail_test, run_test, TestResult}; +use crate::repl::tests::{fail_test, run_test, TestResult}; #[test] fn illegal_column_duplication() -> TestResult { diff --git a/src/tests/test_type_check.rs b/tests/repl/test_type_check.rs similarity index 98% rename from src/tests/test_type_check.rs rename to tests/repl/test_type_check.rs index ed912f8fff..87216e6672 100644 --- a/src/tests/test_type_check.rs +++ b/tests/repl/test_type_check.rs @@ -1,4 +1,4 @@ -use crate::tests::{fail_test, run_test, TestResult}; +use crate::repl::tests::{fail_test, run_test, TestResult}; #[test] fn chained_operator_typecheck() -> TestResult { diff --git a/src/tests.rs b/tests/repl/tests.rs similarity index 89% rename from src/tests.rs rename to tests/repl/tests.rs index 487173b9b1..e3a5e89471 100644 --- a/src/tests.rs +++ b/tests/repl/tests.rs @@ -1,30 +1,3 @@ -mod test_bits; -mod test_cell_path; -mod test_commandline; -mod test_conditionals; -mod test_config; -mod test_config_path; -mod test_converters; -mod test_custom_commands; -mod test_engine; -mod test_env; -mod test_help; -mod test_hiding; -mod test_ide; -mod test_iteration; -mod test_known_external; -mod test_math; -mod test_modules; -mod test_parser; -mod test_ranges; -mod test_regex; -mod test_signatures; -mod test_spread; -mod test_stdlib; -mod test_strings; -mod test_table_operations; -mod test_type_check; - use assert_cmd::prelude::*; use pretty_assertions::assert_eq; use std::collections::HashMap; From 905ec88091a118ba953548ae12fbdbdf4831fd93 Mon Sep 17 00:00:00 2001 From: Ian Manske Date: Mon, 13 May 2024 13:45:44 +0000 Subject: [PATCH 022/137] Update PR template (#12838) # Description Updates the command listed in the PR template to test the standard library, following from #11151. --- .github/pull_request_template.md | 2 +- crates/nu-std/testing.nu | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 40cda47bbc..23d6a5ff95 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -26,7 +26,7 @@ Make sure you've run and fixed any issues with these commands: - `cargo fmt --all -- --check` to check standard code formatting (`cargo fmt --all` applies these changes) - `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used` to check that you're using the standard code style - `cargo test --workspace` to check that all tests pass (on Windows make sure to [enable developer mode](https://learn.microsoft.com/en-us/windows/apps/get-started/developer-mode-features-and-debugging)) -- `cargo run -- -c "use std testing; testing run-tests --path crates/nu-std"` to run the tests for the standard library +- `cargo run -- -c "use toolkit.nu; toolkit test stdlib"` to run the tests for the standard library > **Note** > from `nushell` you can also use the `toolkit` as follows diff --git a/crates/nu-std/testing.nu b/crates/nu-std/testing.nu index ddae1d371e..489430e462 100644 --- a/crates/nu-std/testing.nu +++ b/crates/nu-std/testing.nu @@ -287,7 +287,7 @@ export def run-tests [ --list, # list the selected tests without running them. --threads: int@"nu-complete threads", # Amount of threads to use for parallel execution. Default: All threads are utilized ] { - let available_threads = (sys | get cpu | length) + let available_threads = (sys cpu | length) # Can't use pattern matching here due to https://github.com/nushell/nushell/issues/9198 let threads = (if $threads == null { From aaf973bbba9c1478be78fd192ae01dabc701a9ee Mon Sep 17 00:00:00 2001 From: Piepmatz Date: Mon, 13 May 2024 20:48:38 +0200 Subject: [PATCH 023/137] Add Stack::stdout_file and Stack::stderr_file to capture stdout/-err of external commands (#12857) # Description In this PR I added two new methods to `Stack`, `stdout_file` and `stderr_file`. These two modify the inner `StackOutDest` and set a `File` into the `stdout` and `stderr` respectively. Different to the `push_redirection` methods, these do not require to hold a guard up all the time but require ownership of the stack. This is primarly useful for applications that use `nu` as a language but not the `nushell`. This PR replaces my first attempt #12851 to add a way to capture stdout/-err of external commands. Capturing the stdout without having to write into a file is possible with crates like [`os_pipe`](https://docs.rs/os_pipe), an example for this is given in the doc comment of the `stdout_file` command and can be executed as a doctest (although it doesn't validate that you actually got any data). This implementation takes `File` as input to make it easier to implement on different operating systems without having to worry about `OwnedHandle` or `OwnedFd`. Also this doesn't expose any use `os_pipe` to not leak its types into this API, making it depend on it. As in my previous attempt, @IanManske guided me here. # User-Facing Changes This change has no effect on `nushell` and therefore no user-facing changes. # Tests + Formatting This only exposes a new way of using already existing code and has therefore no further testing. The doctest succeeds on my machine at least (x86 Windows, 64 Bit). # After Submitting All the required documentation is already part of this PR. --- Cargo.lock | 1 + Cargo.toml | 2 +- crates/nu-protocol/Cargo.toml | 1 + crates/nu-protocol/src/engine/stack.rs | 52 ++++++++++++++++++++++++++ 4 files changed, 55 insertions(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index f55ed5ceb4..4b1e1fadaf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3259,6 +3259,7 @@ dependencies = [ "nu-test-support", "nu-utils", "num-format", + "os_pipe", "pretty_assertions", "rmp-serde", "rstest", diff --git a/Cargo.toml b/Cargo.toml index beabd26e20..78db3877ea 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -119,7 +119,7 @@ num-traits = "0.2" omnipath = "0.1" once_cell = "1.18" open = "5.1" -os_pipe = "1.1" +os_pipe = { version = "1.1", features = ["io_safety"] } pathdiff = "0.2" percent-encoding = "2" pretty_assertions = "1.4" diff --git a/crates/nu-protocol/Cargo.toml b/crates/nu-protocol/Cargo.toml index e2420a6e83..be45738447 100644 --- a/crates/nu-protocol/Cargo.toml +++ b/crates/nu-protocol/Cargo.toml @@ -47,6 +47,7 @@ nu-test-support = { path = "../nu-test-support", version = "0.93.1" } pretty_assertions = { workspace = true } rstest = { workspace = true } tempfile = { workspace = true } +os_pipe = { workspace = true } [package.metadata.docs.rs] all-features = true diff --git a/crates/nu-protocol/src/engine/stack.rs b/crates/nu-protocol/src/engine/stack.rs index b577d55270..65f2981fbf 100644 --- a/crates/nu-protocol/src/engine/stack.rs +++ b/crates/nu-protocol/src/engine/stack.rs @@ -7,6 +7,7 @@ use crate::{ }; use std::{ collections::{HashMap, HashSet}, + fs::File, sync::Arc, }; @@ -593,6 +594,57 @@ impl Stack { self } + /// Replaces the default stdout of the stack with a given file. + /// + /// This method configures the default stdout to redirect to a specified file. + /// It is primarily useful for applications using `nu` as a language, where the stdout of + /// external commands that are not explicitly piped can be redirected to a file. + /// + /// # Using Pipes + /// + /// For use in third-party applications pipes might be very useful as they allow using the + /// stdout of external commands for different uses. + /// For example the [`os_pipe`](https://docs.rs/os_pipe) crate provides a elegant way to to + /// access the stdout. + /// + /// ``` + /// # use std::{fs::File, io::{self, Read}, thread, error}; + /// # use nu_protocol::engine::Stack; + /// # + /// let (mut reader, writer) = os_pipe::pipe().unwrap(); + /// // Use a thread to avoid blocking the execution of the called command. + /// let reader = thread::spawn(move || { + /// let mut buf: Vec = Vec::new(); + /// reader.read_to_end(&mut buf)?; + /// Ok::<_, io::Error>(buf) + /// }); + /// + /// #[cfg(windows)] + /// let file = std::os::windows::io::OwnedHandle::from(writer).into(); + /// #[cfg(unix)] + /// let file = std::os::unix::io::OwnedFd::from(writer).into(); + /// + /// let stack = Stack::new().stdout_file(file); + /// + /// // Execute some nu code. + /// + /// drop(stack); // drop the stack so that the writer will be dropped too + /// let buf = reader.join().unwrap().unwrap(); + /// // Do with your buffer whatever you want. + /// ``` + pub fn stdout_file(mut self, file: File) -> Self { + self.out_dest.stdout = OutDest::File(Arc::new(file)); + self + } + + /// Replaces the default stderr of the stack with a given file. + /// + /// For more info, see [`stdout_file`](Self::stdout_file). + pub fn stderr_file(mut self, file: File) -> Self { + self.out_dest.stderr = OutDest::File(Arc::new(file)); + self + } + /// Set the PWD environment variable to `path`. /// /// This method accepts `path` with trailing slashes, but they're removed From 98369985b1252fe18cd50c0f87d38c09d7fb474a Mon Sep 17 00:00:00 2001 From: Jack Wright <56345+ayax79@users.noreply.github.com> Date: Mon, 13 May 2024 16:17:31 -0700 Subject: [PATCH 024/137] Allow custom value operations to work on eager and lazy dataframes interchangeably. (#12819) Fixes Bug #12809 The example that @maxim-uvarov posted now works as expected: Screenshot 2024-05-09 at 16 21 01 --- .../values/nu_dataframe/operations.rs | 2 +- .../values/nu_lazyframe/custom_value.rs | 48 +++++++++++++++++-- 2 files changed, 44 insertions(+), 6 deletions(-) diff --git a/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/operations.rs b/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/operations.rs index 42d803b0e4..db5409aff2 100644 --- a/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/operations.rs +++ b/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/operations.rs @@ -27,7 +27,7 @@ impl NuDataFrame { let rhs_span = right.span(); match right { Value::Custom { .. } => { - let rhs = NuDataFrame::try_from_value(plugin, right)?; + let rhs = NuDataFrame::try_from_value_coerce(plugin, right, rhs_span)?; match (self.is_series(), rhs.is_series()) { (true, true) => { diff --git a/crates/nu_plugin_polars/src/dataframe/values/nu_lazyframe/custom_value.rs b/crates/nu_plugin_polars/src/dataframe/values/nu_lazyframe/custom_value.rs index 731d210dd8..b3cd9500dd 100644 --- a/crates/nu_plugin_polars/src/dataframe/values/nu_lazyframe/custom_value.rs +++ b/crates/nu_plugin_polars/src/dataframe/values/nu_lazyframe/custom_value.rs @@ -7,7 +7,7 @@ use uuid::Uuid; use crate::{ values::{CustomValueSupport, NuDataFrame, PolarsPluginCustomValue}, - PolarsPlugin, + Cacheable, PolarsPlugin, }; use super::NuLazyFrame; @@ -76,11 +76,49 @@ impl PolarsPluginCustomValue for NuLazyFrameCustomValue { _engine: &EngineInterface, other_value: Value, ) -> Result, ShellError> { - // to compare, we need to convert to NuDataframe - let df = NuLazyFrame::try_from_custom_value(plugin, self)?; - let df = df.collect(other_value.span())?; + let eager = NuLazyFrame::try_from_custom_value(plugin, self)?.collect(Span::unknown())?; let other = NuDataFrame::try_from_value_coerce(plugin, &other_value, other_value.span())?; - let res = df.is_equal(&other); + let res = eager.is_equal(&other); Ok(res) } + + fn custom_value_operation( + &self, + plugin: &PolarsPlugin, + engine: &EngineInterface, + lhs_span: Span, + operator: nu_protocol::Spanned, + right: Value, + ) -> Result { + let eager = NuLazyFrame::try_from_custom_value(plugin, self)?.collect(Span::unknown())?; + Ok(eager + .compute_with_value(plugin, lhs_span, operator.item, operator.span, &right)? + .cache(plugin, engine, lhs_span)? + .into_value(lhs_span)) + } + + fn custom_value_follow_path_int( + &self, + plugin: &PolarsPlugin, + _engine: &EngineInterface, + _self_span: Span, + index: nu_protocol::Spanned, + ) -> Result { + let eager = NuLazyFrame::try_from_custom_value(plugin, self)?.collect(Span::unknown())?; + eager.get_value(index.item, index.span) + } + + fn custom_value_follow_path_string( + &self, + plugin: &PolarsPlugin, + engine: &EngineInterface, + self_span: Span, + column_name: nu_protocol::Spanned, + ) -> Result { + let eager = NuLazyFrame::try_from_custom_value(plugin, self)?.collect(Span::unknown())?; + let column = eager.column(&column_name.item, self_span)?; + Ok(column + .cache(plugin, engine, self_span)? + .into_value(self_span)) + } } From cd381b74e035a6b3d403e670e6d782cdea49309f Mon Sep 17 00:00:00 2001 From: Maxime Jacob Date: Mon, 13 May 2024 21:22:39 -0400 Subject: [PATCH 025/137] Fix improperly escaped strings in stor insert (#12820) - fixes #12764 Replaced the custom logic with values_to_sql method that is already used in crate::database. This will ensure that handling of parameters is the same between sqlite and stor. --- crates/nu-command/src/database/mod.rs | 2 +- crates/nu-command/src/database/values/mod.rs | 2 +- crates/nu-command/src/stor/insert.rs | 300 ++++++++++++++----- 3 files changed, 228 insertions(+), 76 deletions(-) diff --git a/crates/nu-command/src/database/mod.rs b/crates/nu-command/src/database/mod.rs index 4fddf6638e..22b92a81d3 100644 --- a/crates/nu-command/src/database/mod.rs +++ b/crates/nu-command/src/database/mod.rs @@ -5,7 +5,7 @@ use commands::add_commands_decls; pub use values::{ convert_sqlite_row_to_nu_value, convert_sqlite_value_to_nu_value, open_connection_in_memory, - open_connection_in_memory_custom, SQLiteDatabase, MEMORY_DB, + open_connection_in_memory_custom, values_to_sql, SQLiteDatabase, MEMORY_DB, }; use nu_protocol::engine::StateWorkingSet; diff --git a/crates/nu-command/src/database/values/mod.rs b/crates/nu-command/src/database/values/mod.rs index 4442ec0783..b9bd3d5d4c 100644 --- a/crates/nu-command/src/database/values/mod.rs +++ b/crates/nu-command/src/database/values/mod.rs @@ -3,5 +3,5 @@ pub mod sqlite; pub use sqlite::{ convert_sqlite_row_to_nu_value, convert_sqlite_value_to_nu_value, open_connection_in_memory, - open_connection_in_memory_custom, SQLiteDatabase, MEMORY_DB, + open_connection_in_memory_custom, values_to_sql, SQLiteDatabase, MEMORY_DB, }; diff --git a/crates/nu-command/src/stor/insert.rs b/crates/nu-command/src/stor/insert.rs index 2aeb076d44..e0c0ad4d28 100644 --- a/crates/nu-command/src/stor/insert.rs +++ b/crates/nu-command/src/stor/insert.rs @@ -1,5 +1,6 @@ -use crate::database::{SQLiteDatabase, MEMORY_DB}; +use crate::database::{values_to_sql, SQLiteDatabase, MEMORY_DB}; use nu_engine::command_prelude::*; +use rusqlite::params_from_iter; #[derive(Clone)] pub struct StorInsert; @@ -57,86 +58,81 @@ impl Command for StorInsert { // let config = engine_state.get_config(); let db = Box::new(SQLiteDatabase::new(std::path::Path::new(MEMORY_DB), None)); - if table_name.is_none() { - return Err(ShellError::MissingParameter { - param_name: "requires at table name".into(), - span, - }); - } - let new_table_name = table_name.unwrap_or("table".into()); - if let Ok(conn) = db.open_connection() { - match columns { - Some(record) => { - let mut create_stmt = format!("INSERT INTO {} ( ", new_table_name); - let cols = record.columns(); - cols.for_each(|col| { - create_stmt.push_str(&format!("{}, ", col)); - }); - if create_stmt.ends_with(", ") { - create_stmt.pop(); - create_stmt.pop(); - } + process(table_name, span, &db, columns)?; - create_stmt.push_str(") VALUES ( "); - let vals = record.values(); - vals.for_each(|val| match val { - Value::Int { val, .. } => { - create_stmt.push_str(&format!("{}, ", val)); - } - Value::Float { val, .. } => { - create_stmt.push_str(&format!("{}, ", val)); - } - Value::String { val, .. } => { - create_stmt.push_str(&format!("'{}', ", val)); - } - Value::Date { val, .. } => { - create_stmt.push_str(&format!("'{}', ", val)); - } - Value::Bool { val, .. } => { - create_stmt.push_str(&format!("{}, ", val)); - } - _ => { - // return Err(ShellError::UnsupportedInput { - // msg: format!("{} is not a valid datepart, expected one of year, month, day, hour, minute, second, millisecond, microsecond, nanosecond", part.item), - // input: "value originates from here".to_string(), - // msg_span: span, - // input_span: val.span(), - // }); - } - }); - if create_stmt.ends_with(", ") { - create_stmt.pop(); - create_stmt.pop(); - } - - create_stmt.push(')'); - - // dbg!(&create_stmt); - - conn.execute(&create_stmt, []) - .map_err(|err| ShellError::GenericError { - error: "Failed to open SQLite connection in memory from insert".into(), - msg: err.to_string(), - span: Some(Span::test_data()), - help: None, - inner: vec![], - })?; - } - None => { - return Err(ShellError::MissingParameter { - param_name: "requires at least one column".into(), - span: call.head, - }); - } - }; - } - // dbg!(db.clone()); Ok(Value::custom(db, span).into_pipeline_data()) } } +fn process( + table_name: Option, + span: Span, + db: &SQLiteDatabase, + columns: Option, +) -> Result<(), ShellError> { + if table_name.is_none() { + return Err(ShellError::MissingParameter { + param_name: "requires at table name".into(), + span, + }); + } + let new_table_name = table_name.unwrap_or("table".into()); + if let Ok(conn) = db.open_connection() { + match columns { + Some(record) => { + let mut create_stmt = format!("INSERT INTO {} ( ", new_table_name); + let cols = record.columns(); + cols.for_each(|col| { + create_stmt.push_str(&format!("{}, ", col)); + }); + if create_stmt.ends_with(", ") { + create_stmt.pop(); + create_stmt.pop(); + } + + // Values are set as placeholders. + create_stmt.push_str(") VALUES ( "); + for (index, _) in record.columns().enumerate() { + create_stmt.push_str(&format!("?{}, ", index + 1)); + } + + if create_stmt.ends_with(", ") { + create_stmt.pop(); + create_stmt.pop(); + } + + create_stmt.push(')'); + + // dbg!(&create_stmt); + + // Get the params from the passed values + let params = values_to_sql(record.values().cloned())?; + + conn.execute(&create_stmt, params_from_iter(params)) + .map_err(|err| ShellError::GenericError { + error: "Failed to open SQLite connection in memory from insert".into(), + msg: err.to_string(), + span: Some(Span::test_data()), + help: None, + inner: vec![], + })?; + } + None => { + return Err(ShellError::MissingParameter { + param_name: "requires at least one column".into(), + span, + }); + } + }; + } + // dbg!(db.clone()); + Ok(()) +} + #[cfg(test)] mod test { + use chrono::DateTime; + use super::*; #[test] @@ -145,4 +141,160 @@ mod test { test_examples(StorInsert {}) } + + #[test] + fn test_process_with_simple_parameters() { + let db = Box::new(SQLiteDatabase::new(std::path::Path::new(MEMORY_DB), None)); + let create_stmt = "CREATE TABLE test_process_with_simple_parameters ( + int_column INTEGER, + real_column REAL, + str_column VARCHAR(255), + bool_column BOOLEAN, + date_column DATETIME DEFAULT(STRFTIME('%Y-%m-%d %H:%M:%f', 'NOW')) + )"; + + let conn = db + .open_connection() + .expect("Test was unable to open connection."); + conn.execute(create_stmt, []) + .expect("Failed to create table as part of test."); + let table_name = Some("test_process_with_simple_parameters".to_string()); + let span = Span::unknown(); + let mut columns = Record::new(); + columns.insert("int_column".to_string(), Value::test_int(42)); + columns.insert("real_column".to_string(), Value::test_float(3.1)); + columns.insert( + "str_column".to_string(), + Value::test_string("SimpleString".to_string()), + ); + columns.insert("bool_column".to_string(), Value::test_bool(true)); + columns.insert( + "date_column".to_string(), + Value::test_date( + DateTime::parse_from_str("2021-12-30 00:00:00 +0000", "%Y-%m-%d %H:%M:%S %z") + .expect("Date string should parse."), + ), + ); + + let result = process(table_name, span, &db, Some(columns)); + + assert!(result.is_ok()); + } + + #[test] + fn test_process_string_with_space() { + let db = Box::new(SQLiteDatabase::new(std::path::Path::new(MEMORY_DB), None)); + let create_stmt = "CREATE TABLE test_process_string_with_space ( + str_column VARCHAR(255) + )"; + + let conn = db + .open_connection() + .expect("Test was unable to open connection."); + conn.execute(create_stmt, []) + .expect("Failed to create table as part of test."); + let table_name = Some("test_process_string_with_space".to_string()); + let span = Span::unknown(); + let mut columns = Record::new(); + columns.insert( + "str_column".to_string(), + Value::test_string("String With Spaces".to_string()), + ); + + let result = process(table_name, span, &db, Some(columns)); + + assert!(result.is_ok()); + } + + #[test] + fn test_no_errors_when_string_too_long() { + let db = Box::new(SQLiteDatabase::new(std::path::Path::new(MEMORY_DB), None)); + let create_stmt = "CREATE TABLE test_errors_when_string_too_long ( + str_column VARCHAR(8) + )"; + + let conn = db + .open_connection() + .expect("Test was unable to open connection."); + conn.execute(create_stmt, []) + .expect("Failed to create table as part of test."); + let table_name = Some("test_errors_when_string_too_long".to_string()); + let span = Span::unknown(); + let mut columns = Record::new(); + columns.insert( + "str_column".to_string(), + Value::test_string("ThisIsALongString".to_string()), + ); + + let result = process(table_name, span, &db, Some(columns)); + // SQLite uses dynamic typing, making any length acceptable for a varchar column + assert!(result.is_ok()); + } + + #[test] + fn test_no_errors_when_param_is_wrong_type() { + let db = Box::new(SQLiteDatabase::new(std::path::Path::new(MEMORY_DB), None)); + let create_stmt = "CREATE TABLE test_errors_when_param_is_wrong_type ( + int_column INT + )"; + + let conn = db + .open_connection() + .expect("Test was unable to open connection."); + conn.execute(create_stmt, []) + .expect("Failed to create table as part of test."); + let table_name = Some("test_errors_when_param_is_wrong_type".to_string()); + let span = Span::unknown(); + let mut columns = Record::new(); + columns.insert( + "int_column".to_string(), + Value::test_string("ThisIsTheWrongType".to_string()), + ); + + let result = process(table_name, span, &db, Some(columns)); + // SQLite uses dynamic typing, making any type acceptable for a column + assert!(result.is_ok()); + } + + #[test] + fn test_errors_when_column_doesnt_exist() { + let db = Box::new(SQLiteDatabase::new(std::path::Path::new(MEMORY_DB), None)); + let create_stmt = "CREATE TABLE test_errors_when_column_doesnt_exist ( + int_column INT + )"; + + let conn = db + .open_connection() + .expect("Test was unable to open connection."); + conn.execute(create_stmt, []) + .expect("Failed to create table as part of test."); + let table_name = Some("test_errors_when_column_doesnt_exist".to_string()); + let span = Span::unknown(); + let mut columns = Record::new(); + columns.insert( + "not_a_column".to_string(), + Value::test_string("ThisIsALongString".to_string()), + ); + + let result = process(table_name, span, &db, Some(columns)); + + assert!(result.is_err()); + } + + #[test] + fn test_errors_when_table_doesnt_exist() { + let db = Box::new(SQLiteDatabase::new(std::path::Path::new(MEMORY_DB), None)); + + let table_name = Some("test_errors_when_table_doesnt_exist".to_string()); + let span = Span::unknown(); + let mut columns = Record::new(); + columns.insert( + "str_column".to_string(), + Value::test_string("ThisIsALongString".to_string()), + ); + + let result = process(table_name, span, &db, Some(columns)); + + assert!(result.is_err()); + } } From 2ed77aef1dc9241c169c9e441aeb53f5a01932a0 Mon Sep 17 00:00:00 2001 From: Maxime Jacob Date: Tue, 14 May 2024 10:13:49 -0400 Subject: [PATCH 026/137] Fix panic when exploring empty dictionary (#12860) - fixes #12841 # Description Add boundary checks to ensure that the row and column chosen in RecordView are not over the length of the possible row and columns. If we are out of bounds, we default to Value::nothing. # Tests + Formatting Tests ran and formatting done --- crates/nu-explore/src/views/record/mod.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/crates/nu-explore/src/views/record/mod.rs b/crates/nu-explore/src/views/record/mod.rs index 6534ba3589..0ed8e3e761 100644 --- a/crates/nu-explore/src/views/record/mod.rs +++ b/crates/nu-explore/src/views/record/mod.rs @@ -20,7 +20,7 @@ use crossterm::event::{KeyCode, KeyEvent, KeyModifiers}; use nu_color_config::{get_color_map, StyleComputer}; use nu_protocol::{ engine::{EngineState, Stack}, - Record, Value, + Record, Span, Value, }; use ratatui::{layout::Rect, widgets::Block}; use std::{borrow::Cow, collections::HashMap}; @@ -180,7 +180,11 @@ impl<'a> RecordView<'a> { Orientation::Left => (column, row), }; - layer.records[row][column].clone() + if layer.records.len() > row && layer.records[row].len() > column { + layer.records[row][column].clone() + } else { + Value::nothing(Span::unknown()) + } } fn create_tablew(&'a self, cfg: ViewConfig<'a>) -> TableW<'a> { From aa46bc97b354d28c14c5ad2c51c9eb7f94276bec Mon Sep 17 00:00:00 2001 From: NotTheDr01ds <32344964+NotTheDr01ds@users.noreply.github.com> Date: Tue, 14 May 2024 10:21:50 -0400 Subject: [PATCH 027/137] Search terms for compact command (#12864) # Description There was a question in Discord today about how to remove empty rows from a table. The user found the `compact` command on their own, but I realized that there were no search terms on the command. I've added 'empty' and 'remove', although I subsequently figured out that 'empty' is found in the "usage" anyway. That said, I don't think it hurts to have good search terms behind it regardless. # User-Facing Changes Just the help # Tests + Formatting - :green_circle: `toolkit fmt` - :green_circle: `toolkit clippy` - :green_circle: `toolkit test` - :green_circle: `toolkit test stdlib` # After Submitting --- crates/nu-command/src/filters/compact.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/crates/nu-command/src/filters/compact.rs b/crates/nu-command/src/filters/compact.rs index d8872448be..9a4e968e9b 100644 --- a/crates/nu-command/src/filters/compact.rs +++ b/crates/nu-command/src/filters/compact.rs @@ -31,6 +31,10 @@ impl Command for Compact { "Creates a table with non-empty rows." } + fn search_terms(&self) -> Vec<&str> { + vec!["empty", "remove"] + } + fn run( &self, engine_state: &EngineState, From c3da44cbb727348411d3520bf2b186860496f8da Mon Sep 17 00:00:00 2001 From: Ian Manske Date: Tue, 14 May 2024 21:10:06 +0000 Subject: [PATCH 028/137] Fix `char` panic (#12867) # Description The `char` command can panic due to a failed `expect`: `char --integer ...[77 78 79]` This PR fixes the panic for the `--integer` flag and also the `--unicode` flag. # After Submitting Check other commands and places where similar bugs can occur due to usages of `Call::positional_nth` and related methods. --- crates/nu-command/src/strings/char_.rs | 90 ++++++++++++-------------- 1 file changed, 41 insertions(+), 49 deletions(-) diff --git a/crates/nu-command/src/strings/char_.rs b/crates/nu-command/src/strings/char_.rs index e227f2667f..7d8d152b56 100644 --- a/crates/nu-command/src/strings/char_.rs +++ b/crates/nu-command/src/strings/char_.rs @@ -235,18 +235,18 @@ impl Command for Char { // handle -i flag if integer { - let int_args: Vec = call.rest_const(working_set, 0)?; - handle_integer_flag(int_args, call, call_span) + let int_args = call.rest_const(working_set, 0)?; + handle_integer_flag(int_args, call_span) } // handle -u flag else if unicode { - let string_args: Vec = call.rest_const(working_set, 0)?; - handle_unicode_flag(string_args, call, call_span) + let string_args = call.rest_const(working_set, 0)?; + handle_unicode_flag(string_args, call_span) } // handle the rest else { - let string_args: Vec = call.rest_const(working_set, 0)?; - handle_the_rest(string_args, call, call_span) + let string_args = call.rest_const(working_set, 0)?; + handle_the_rest(string_args, call_span) } } @@ -270,18 +270,18 @@ impl Command for Char { // handle -i flag if integer { - let int_args: Vec = call.rest(engine_state, stack, 0)?; - handle_integer_flag(int_args, call, call_span) + let int_args = call.rest(engine_state, stack, 0)?; + handle_integer_flag(int_args, call_span) } // handle -u flag else if unicode { - let string_args: Vec = call.rest(engine_state, stack, 0)?; - handle_unicode_flag(string_args, call, call_span) + let string_args = call.rest(engine_state, stack, 0)?; + handle_unicode_flag(string_args, call_span) } // handle the rest else { - let string_args: Vec = call.rest(engine_state, stack, 0)?; - handle_the_rest(string_args, call, call_span) + let string_args = call.rest(engine_state, stack, 0)?; + handle_the_rest(string_args, call_span) } } } @@ -309,8 +309,7 @@ fn generate_character_list(ctrlc: Option>, call_span: Span) -> P } fn handle_integer_flag( - int_args: Vec, - call: &Call, + int_args: Vec>, call_span: Span, ) -> Result { if int_args.is_empty() { @@ -319,20 +318,17 @@ fn handle_integer_flag( span: call_span, }); } - let mut multi_byte = String::new(); - for (i, &arg) in int_args.iter().enumerate() { - let span = call - .positional_nth(i) - .expect("Unexpected missing argument") - .span; - multi_byte.push(integer_to_unicode_char(arg, span)?) - } - Ok(Value::string(multi_byte, call_span).into_pipeline_data()) + + let str = int_args + .into_iter() + .map(integer_to_unicode_char) + .collect::>()?; + + Ok(Value::string(str, call_span).into_pipeline_data()) } fn handle_unicode_flag( - string_args: Vec, - call: &Call, + string_args: Vec>, call_span: Span, ) -> Result { if string_args.is_empty() { @@ -341,57 +337,53 @@ fn handle_unicode_flag( span: call_span, }); } - let mut multi_byte = String::new(); - for (i, arg) in string_args.iter().enumerate() { - let span = call - .positional_nth(i) - .expect("Unexpected missing argument") - .span; - multi_byte.push(string_to_unicode_char(arg, span)?) - } - Ok(Value::string(multi_byte, call_span).into_pipeline_data()) + + let str = string_args + .into_iter() + .map(string_to_unicode_char) + .collect::>()?; + + Ok(Value::string(str, call_span).into_pipeline_data()) } fn handle_the_rest( - string_args: Vec, - call: &Call, + string_args: Vec>, call_span: Span, ) -> Result { - if string_args.is_empty() { + let Some(s) = string_args.first() else { return Err(ShellError::MissingParameter { param_name: "missing name of the character".into(), span: call_span, }); - } - let special_character = str_to_character(&string_args[0]); + }; + + let special_character = str_to_character(&s.item); + if let Some(output) = special_character { Ok(Value::string(output, call_span).into_pipeline_data()) } else { Err(ShellError::TypeMismatch { err_message: "error finding named character".into(), - span: call - .positional_nth(0) - .expect("Unexpected missing argument") - .span, + span: s.span, }) } } -fn integer_to_unicode_char(value: i64, t: Span) -> Result { - let decoded_char = value.try_into().ok().and_then(std::char::from_u32); +fn integer_to_unicode_char(value: Spanned) -> Result { + let decoded_char = value.item.try_into().ok().and_then(std::char::from_u32); if let Some(ch) = decoded_char { Ok(ch) } else { Err(ShellError::TypeMismatch { err_message: "not a valid Unicode codepoint".into(), - span: t, + span: value.span, }) } } -fn string_to_unicode_char(s: &str, t: Span) -> Result { - let decoded_char = u32::from_str_radix(s, 16) +fn string_to_unicode_char(s: Spanned) -> Result { + let decoded_char = u32::from_str_radix(&s.item, 16) .ok() .and_then(std::char::from_u32); @@ -400,7 +392,7 @@ fn string_to_unicode_char(s: &str, t: Span) -> Result { } else { Err(ShellError::TypeMismatch { err_message: "error decoding Unicode character".into(), - span: t, + span: s.span, }) } } From cb64c78a3b7a7b56a70a884096472531ccdf4c02 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 15 May 2024 09:05:55 +0800 Subject: [PATCH 029/137] Bump interprocess from 2.0.1 to 2.1.0 (#12869) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [interprocess](https://github.com/kotauskas/interprocess) from 2.0.1 to 2.1.0.
Release notes

Sourced from interprocess's releases.

2.1.0 – listeners are now iterators

  • Fixes #49
  • Adds Iterator impl on local socket listeners (closes #64)
  • Miscellaneous documentation fixes
Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=interprocess&package-manager=cargo&previous-version=2.0.1&new-version=2.1.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4b1e1fadaf..e3d29fef42 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2034,9 +2034,9 @@ dependencies = [ [[package]] name = "interprocess" -version = "2.0.1" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c7fb8583fab9503654385e2bafda123376445a77027a1b106dd7e44cf51122f" +checksum = "7b4d0250d41da118226e55b3d50ca3f0d9e0a0f6829b92f543ac0054aeea1572" dependencies = [ "libc", "recvmsg", diff --git a/Cargo.toml b/Cargo.toml index 78db3877ea..6b035ee0f8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -94,7 +94,7 @@ heck = "0.5.0" human-date-parser = "0.1.1" indexmap = "2.2" indicatif = "0.17" -interprocess = "2.0.1" +interprocess = "2.1.0" is_executable = "1.0" itertools = "0.12" libc = "0.2" From 9bf4d3ece6484d6aeb23422447ec6b11b6fb7a7e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 15 May 2024 09:06:09 +0800 Subject: [PATCH 030/137] Bump rust-embed from 8.3.0 to 8.4.0 (#12870) Bumps [rust-embed](https://github.com/pyros2097/rust-embed) from 8.3.0 to 8.4.0.
Changelog

Sourced from rust-embed's changelog.

[8.4.0] - 2024-05-11

  • Re-export RustEmbed as Embed #245. Thanks to pyrossh
  • Do not build glob matchers repeatedly when include-exclude feature is enabled #244. Thanks to osiewicz
  • Add metadata_only attribute #241. Thanks to ddfisher
  • Replace expect with a safer alternative that returns None instead #240. Thanks to costinsin
  • Eliminate unnecessary to_path call #239. Thanks to smoelius
Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=rust-embed&package-manager=cargo&previous-version=8.3.0&new-version=8.4.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 12 ++++++------ Cargo.toml | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e3d29fef42..656f859a63 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5057,9 +5057,9 @@ dependencies = [ [[package]] name = "rust-embed" -version = "8.3.0" +version = "8.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb78f46d0066053d16d4ca7b898e9343bc3530f71c61d5ad84cd404ada068745" +checksum = "19549741604902eb99a7ed0ee177a0663ee1eda51a29f71401f166e47e77806a" dependencies = [ "rust-embed-impl", "rust-embed-utils", @@ -5068,9 +5068,9 @@ dependencies = [ [[package]] name = "rust-embed-impl" -version = "8.3.0" +version = "8.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b91ac2a3c6c0520a3fb3dd89321177c3c692937c4eb21893378219da10c44fc8" +checksum = "cb9f96e283ec64401f30d3df8ee2aaeb2561f34c824381efa24a35f79bf40ee4" dependencies = [ "proc-macro2", "quote", @@ -5081,9 +5081,9 @@ dependencies = [ [[package]] name = "rust-embed-utils" -version = "8.3.0" +version = "8.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86f69089032567ffff4eada41c573fc43ff466c7db7c5688b2e7969584345581" +checksum = "38c74a686185620830701348de757fd36bef4aa9680fd23c49fc539ddcc1af32" dependencies = [ "sha2", "walkdir", diff --git a/Cargo.toml b/Cargo.toml index 6b035ee0f8..1366ecbdd8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -140,7 +140,7 @@ ropey = "1.6.1" roxmltree = "0.19" rstest = { version = "0.18", default-features = false } rusqlite = "0.31" -rust-embed = "8.3.0" +rust-embed = "8.4.0" same-file = "1.0" serde = { version = "1.0", default-features = false } serde_json = "1.0" From 155934f783e15c6ffb3a42119105799a625f7bbc Mon Sep 17 00:00:00 2001 From: Wind Date: Wed, 15 May 2024 09:14:11 +0800 Subject: [PATCH 031/137] make better messages for incomplete string (#12868) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Description Fixes: #12795 The issue is caused by an empty position of `ParseError::UnexpectedEof`. So no detailed message is displayed. To fix the issue, I adjust the start of span to `span.end - 1`. In this way, we can make sure that it never points to an empty position. After lexing item, I also reorder the unclosed character checking . Now it will be checking unclosed opening delimiters first. # User-Facing Changes After this pr, it outputs detailed error message for incomplete string when running scripts. ## Before ``` ❯ nu -c "'ab" Error: nu::parser::unexpected_eof × Unexpected end of code. ╭─[source:1:4] 1 │ 'ab ╰──── > ./target/debug/nu -c "r#'ab" Error: nu::parser::unexpected_eof × Unexpected end of code. ╭─[source:1:6] 1 │ r#'ab ╰──── ``` ## After ``` > nu -c "'ab" Error: nu::parser::unexpected_eof × Unexpected end of code. ╭─[source:1:3] 1 │ 'ab · ┬ · ╰── expected closing ' ╰──── > ./target/debug/nu -c "r#'ab" Error: nu::parser::unexpected_eof × Unexpected end of code. ╭─[source:1:5] 1 │ r#'ab · ┬ · ╰── expected closing '# ╰──── ``` # Tests + Formatting Added some tests for incomplete string. --------- Co-authored-by: Ian Manske --- crates/nu-parser/src/lex.rs | 42 ++++++++++++++++++++----------------- tests/repl/test_strings.rs | 16 ++++++++++++-- 2 files changed, 37 insertions(+), 21 deletions(-) diff --git a/crates/nu-parser/src/lex.rs b/crates/nu-parser/src/lex.rs index f9e75d13f0..4639cae25d 100644 --- a/crates/nu-parser/src/lex.rs +++ b/crates/nu-parser/src/lex.rs @@ -129,7 +129,7 @@ pub fn lex_item( }, Some(ParseError::UnexpectedEof( (start as char).to_string(), - Span::new(span.end, span.end), + Span::new(span.end - 1, span.end), )), ); } @@ -247,21 +247,6 @@ pub fn lex_item( let span = Span::new(span_offset + token_start, span_offset + *curr_offset); - // If there is still unclosed opening delimiters, remember they were missing - if let Some(block) = block_level.last() { - let delim = block.closing(); - let cause = - ParseError::UnexpectedEof((delim as char).to_string(), Span::new(span.end, span.end)); - - return ( - Token { - contents: TokenContents::Item, - span, - }, - Some(cause), - ); - } - if let Some(delim) = quote_start { // The non-lite parse trims quotes on both sides, so we add the expected quote so that // anyone wanting to consume this partial parse (e.g., completions) will be able to get @@ -273,11 +258,28 @@ pub fn lex_item( }, Some(ParseError::UnexpectedEof( (delim as char).to_string(), - Span::new(span.end, span.end), + Span::new(span.end - 1, span.end), )), ); } + // If there is still unclosed opening delimiters, remember they were missing + if let Some(block) = block_level.last() { + let delim = block.closing(); + let cause = ParseError::UnexpectedEof( + (delim as char).to_string(), + Span::new(span.end - 1, span.end), + ); + + return ( + Token { + contents: TokenContents::Item, + span, + }, + Some(cause), + ); + } + // If we didn't accumulate any characters, it's an unexpected error. if *curr_offset - token_start == 0 { return ( @@ -395,9 +397,11 @@ fn lex_raw_string( *curr_offset += 1 } if !matches { + let mut expected = '\''.to_string(); + expected.push_str(&"#".repeat(prefix_sharp_cnt)); return Err(ParseError::UnexpectedEof( - "#".to_string(), - Span::new(span_offset + *curr_offset, span_offset + *curr_offset), + expected, + Span::new(span_offset + *curr_offset - 1, span_offset + *curr_offset), )); } Ok(()) diff --git a/tests/repl/test_strings.rs b/tests/repl/test_strings.rs index cafd6cc681..fa6c419e23 100644 --- a/tests/repl/test_strings.rs +++ b/tests/repl/test_strings.rs @@ -154,6 +154,18 @@ fn raw_string_inside_closure() -> TestResult { } #[test] -fn incomplete_raw_string() -> TestResult { - fail_test("r#abc", "expected '") +fn incomplete_string() -> TestResult { + fail_test("r#abc", "expected '")?; + fail_test("r#'bc", "expected closing '#")?; + fail_test("'ab\"", "expected closing '")?; + fail_test("\"ab'", "expected closing \"")?; + fail_test( + r#"def func [] { + { + "A": ""B" # <- the quote is bad + } +} +"#, + "expected closing \"", + ) } From a7807735b15ad377f281091a85afb3791d9df608 Mon Sep 17 00:00:00 2001 From: Andy Gayton Date: Tue, 14 May 2024 21:48:27 -0400 Subject: [PATCH 032/137] Add a passing test for interactivity on slow pipelines (#12865) # Description This PR adds a single test to assert interactivity on slow pipelines Currently the timeout is set to 6 seconds, as the test can sometimes take ~3secs to run on my local m1 mac air, which I don't think is an indication of a slow pipeline, but rather slow test start up time... --- tests/plugins/stream.rs | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/plugins/stream.rs b/tests/plugins/stream.rs index ee62703017..8530e5bc32 100644 --- a/tests/plugins/stream.rs +++ b/tests/plugins/stream.rs @@ -1,3 +1,5 @@ +use rstest::rstest; + use nu_test_support::nu_with_plugins; use pretty_assertions::assert_eq; @@ -190,3 +192,18 @@ fn generate_sequence() { assert_eq!(actual.out, "[0,2,4,6,8,10]"); } + +#[rstest] +#[timeout(std::time::Duration::from_secs(6))] +fn echo_interactivity_on_slow_pipelines() { + // This test works by putting 0 on the upstream immediately, followed by 1 after 10 seconds. + // If values aren't streamed to the plugin as they become available, `example echo` won't emit + // anything until both 0 and 1 are available. The desired behavior is that `example echo` gets + // the 0 immediately, which is consumed by `first`, allowing the pipeline to terminate early. + let actual = nu_with_plugins!( + cwd: "tests/fixtures/formats", + plugin: ("nu_plugin_example"), + r#"[1] | each { |n| sleep 10sec; $n } | prepend 0 | example echo | first"# + ); + assert_eq!(actual.out, "0"); +} From 0cfbdc909e7e04138881eee714eaeea762e4a5c5 Mon Sep 17 00:00:00 2001 From: Ian Manske Date: Wed, 15 May 2024 07:40:04 +0000 Subject: [PATCH 033/137] Fix `sys` panic (#12846) # Description This should fix #10155 where the `sys` command can panic due to date math in certain cases / on certain systems. # User-Facing Changes The `boot_time` column now has a date value instead of a formatted date string. This is technically a breaking change. --- crates/nu-command/src/system/sys/mod.rs | 34 +++++++++++++++---------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/crates/nu-command/src/system/sys/mod.rs b/crates/nu-command/src/system/sys/mod.rs index 5e0467c251..fcb40fd37d 100644 --- a/crates/nu-command/src/system/sys/mod.rs +++ b/crates/nu-command/src/system/sys/mod.rs @@ -16,9 +16,8 @@ pub use sys_::Sys; pub use temp::SysTemp; pub use users::SysUsers; -use chrono::{DateTime, Local}; +use chrono::{DateTime, FixedOffset, Local}; use nu_protocol::{record, Record, Span, Value}; -use std::time::{Duration, UNIX_EPOCH}; use sysinfo::{ Components, CpuRefreshKind, Disks, Networks, System, Users, MINIMUM_CPU_UPDATE_INTERVAL, }; @@ -171,22 +170,31 @@ pub fn host(span: Span) -> Record { record.push("hostname", Value::string(trim_cstyle_null(hostname), span)); } - record.push( - "uptime", - Value::duration(1000000000 * System::uptime() as i64, span), - ); + let uptime = System::uptime() + .saturating_mul(1_000_000_000) + .try_into() + .unwrap_or(i64::MAX); - // Creates a new SystemTime from the specified number of whole seconds - let d = UNIX_EPOCH + Duration::from_secs(System::boot_time()); - // Create DateTime from SystemTime - let datetime = DateTime::::from(d); - // Convert to local time and then rfc3339 - let timestamp_str = datetime.with_timezone(datetime.offset()).to_rfc3339(); - record.push("boot_time", Value::string(timestamp_str, span)); + record.push("uptime", Value::duration(uptime, span)); + + let boot_time = boot_time() + .map(|time| Value::date(time, span)) + .unwrap_or(Value::nothing(span)); + + record.push("boot_time", boot_time); record } +fn boot_time() -> Option> { + // Broken systems can apparently return really high values. + // See: https://github.com/nushell/nushell/issues/10155 + // First, try to convert u64 to i64, and then try to create a `DateTime`. + let secs = System::boot_time().try_into().ok()?; + let time = DateTime::from_timestamp(secs, 0)?; + Some(time.with_timezone(&Local).fixed_offset()) +} + pub fn temp(span: Span) -> Value { let components = Components::new_with_refreshed_list() .iter() From defed3001da941e76f3df059544f689fe85c6cc0 Mon Sep 17 00:00:00 2001 From: Darren Schroeder <343840+fdncred@users.noreply.github.com> Date: Wed, 15 May 2024 09:44:09 -0500 Subject: [PATCH 034/137] make it clearer what is being loaded with --log-level info (#12875) # Description A common question we get is what config files are loaded when and with what parameters. It's for this reason that I wrote [this gist](https://gist.github.com/fdncred/b87b784f04984dc31a150baed9ad2447). Another way to figure this out is to use `nu --log-level info`. This will show some performance timings but will also show what is being loaded when. For the most part the `[INFO]` lines show the performance timings and the `[WARN]` lines show the files. This PR tries to make things a little bit clearer when using the `--log-level info` parameter. # User-Facing Changes # Tests + Formatting # After Submitting --- src/config_files.rs | 51 ++++++++++++++++++++++++++------------------- 1 file changed, 30 insertions(+), 21 deletions(-) diff --git a/src/config_files.rs b/src/config_files.rs index ba8a3436a0..ec4511860f 100644 --- a/src/config_files.rs +++ b/src/config_files.rs @@ -1,4 +1,4 @@ -use log::{info, trace}; +use log::warn; #[cfg(feature = "plugin")] use nu_cli::read_plugin_file; use nu_cli::{eval_config_contents, eval_source}; @@ -27,7 +27,10 @@ pub(crate) fn read_config_file( config_file: Option>, is_env_config: bool, ) { - trace!("read_config_file {:?}", &config_file); + warn!( + "read_config_file() config_file_specified: {:?}, is_env_config: {is_env_config}", + &config_file + ); // Load config startup file if let Some(file) = config_file { let working_set = StateWorkingSet::new(engine_state); @@ -122,21 +125,27 @@ pub(crate) fn read_config_file( } pub(crate) fn read_loginshell_file(engine_state: &mut EngineState, stack: &mut Stack) { + warn!( + "read_loginshell_file() {}:{}:{}", + file!(), + line!(), + column!() + ); + // read and execute loginshell file if exists if let Some(mut config_path) = nu_path::config_dir() { config_path.push(NUSHELL_FOLDER); config_path.push(LOGINSHELL_FILE); + warn!("loginshell_file: {}", config_path.display()); + if config_path.exists() { eval_config_contents(config_path, engine_state, stack); } } - - info!("read_loginshell_file {}:{}:{}", file!(), line!(), column!()); } pub(crate) fn read_default_env_file(engine_state: &mut EngineState, stack: &mut Stack) { - trace!("read_default_env_file"); let config_file = get_default_env(); eval_source( engine_state, @@ -147,7 +156,13 @@ pub(crate) fn read_default_env_file(engine_state: &mut EngineState, stack: &mut false, ); - info!("read_config_file {}:{}:{}", file!(), line!(), column!()); + warn!( + "read_default_env_file() env_file_contents: {config_file} {}:{}:{}", + file!(), + line!(), + column!() + ); + // Merge the environment in case env vars changed in the config match engine_state.cwd(Some(stack)) { Ok(cwd) => { @@ -167,10 +182,9 @@ fn eval_default_config( config_file: &str, is_env_config: bool, ) { - trace!( - "eval_default_config: config_file: {:?}, is_env_config: {}", - &config_file, - is_env_config + warn!( + "eval_default_config() config_file_specified: {:?}, is_env_config: {}", + &config_file, is_env_config ); println!("Continuing without config file"); // Just use the contents of "default_config.nu" or "default_env.nu" @@ -208,11 +222,9 @@ pub(crate) fn setup_config( env_file: Option>, is_login_shell: bool, ) { - trace!( - "setup_config: config: {:?}, env: {:?}, login: {}", - &config_file, - &env_file, - is_login_shell + warn!( + "setup_config() config_file_specified: {:?}, env_file_specified: {:?}, login: {}", + &config_file, &env_file, is_login_shell ); let result = catch_unwind(AssertUnwindSafe(|| { #[cfg(feature = "plugin")] @@ -240,12 +252,9 @@ pub(crate) fn set_config_path( key: &str, config_file: Option<&Spanned>, ) { - trace!( - "set_config_path: cwd: {:?}, default_config: {}, key: {}, config_file: {:?}", - &cwd, - &default_config_name, - &key, - &config_file + warn!( + "set_config_path() cwd: {:?}, default_config: {}, key: {}, config_file_specified: {:?}", + &cwd, &default_config_name, &key, &config_file ); let config_path = match config_file { Some(s) => canonicalize_with(&s.item, cwd).ok(), From 72b880662b75d5a418369dec98288351769a3990 Mon Sep 17 00:00:00 2001 From: NotTheDr01ds <32344964+NotTheDr01ds@users.noreply.github.com> Date: Wed, 15 May 2024 12:16:59 -0400 Subject: [PATCH 035/137] Fixed a nitpick usage-help error - closure v. block (#12876) # Description So minor, but had to be fixed sometime. `help each while` used the term "block" in the "usage", but the argument type is a closure. # User-Facing Changes help-only --- crates/nu-cmd-extra/src/extra/filters/each_while.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/nu-cmd-extra/src/extra/filters/each_while.rs b/crates/nu-cmd-extra/src/extra/filters/each_while.rs index 2dda815d43..939f194f43 100644 --- a/crates/nu-cmd-extra/src/extra/filters/each_while.rs +++ b/crates/nu-cmd-extra/src/extra/filters/each_while.rs @@ -10,7 +10,7 @@ impl Command for EachWhile { } fn usage(&self) -> &str { - "Run a block on each row of the input list until a null is found, then create a new list with the results." + "Run a closure on each row of the input list until a null is found, then create a new list with the results." } fn search_terms(&self) -> Vec<&str> { From b08135d877d94279d4250d0ae5d0ee2887530c53 Mon Sep 17 00:00:00 2001 From: NotTheDr01ds <32344964+NotTheDr01ds@users.noreply.github.com> Date: Wed, 15 May 2024 13:49:08 -0400 Subject: [PATCH 036/137] Fixed small error in the help-examples for the get command (#12877) # Description Another small error in Help, this time for the `get` command example. # User-Facing Changes Help only --- crates/nu-command/src/filters/get.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/nu-command/src/filters/get.rs b/crates/nu-command/src/filters/get.rs index c30ad1f3a4..a481372db1 100644 --- a/crates/nu-command/src/filters/get.rs +++ b/crates/nu-command/src/filters/get.rs @@ -115,7 +115,7 @@ If multiple cell paths are given, this will produce a list of values."# }, Example { description: - "Extract the name of the 3rd record in a list (same as `ls | $in.name`)", + "Extract the name of the 3rd record in a list (same as `ls | $in.name.2`)", example: "ls | get name.2", result: None, }, From 06fe7d1e16c405a72ad08b2dc68b83fd947ddeba Mon Sep 17 00:00:00 2001 From: Ian Manske Date: Wed, 15 May 2024 17:59:42 +0000 Subject: [PATCH 037/137] Remove usages of `Call::positional_nth` (#12871) # Description Following from #12867, this PR replaces usages of `Call::positional_nth` with existing spans. This removes several `expect`s from the code. Also remove unused `positional_nth_mut` and `positional_iter_mut` --- crates/nu-command/src/env/with_env.rs | 10 ++-------- crates/nu-command/src/filesystem/touch.rs | 17 ++++------------- crates/nu-command/src/platform/ansi/ansi_.rs | 5 +---- crates/nu-protocol/src/ast/call.rs | 20 -------------------- 4 files changed, 7 insertions(+), 45 deletions(-) diff --git a/crates/nu-command/src/env/with_env.rs b/crates/nu-command/src/env/with_env.rs index 7eaf64cb54..ff66d21f49 100644 --- a/crates/nu-command/src/env/with_env.rs +++ b/crates/nu-command/src/env/with_env.rs @@ -90,10 +90,7 @@ fn with_env( return Err(ShellError::CantConvert { to_type: "record".into(), from_type: x.get_type().to_string(), - span: call - .positional_nth(1) - .expect("already checked through .req") - .span, + span: x.span(), help: None, }); } @@ -124,10 +121,7 @@ fn with_env( return Err(ShellError::CantConvert { to_type: "record".into(), from_type: x.get_type().to_string(), - span: call - .positional_nth(1) - .expect("already checked through .req") - .span, + span: x.span(), help: None, }); } diff --git a/crates/nu-command/src/filesystem/touch.rs b/crates/nu-command/src/filesystem/touch.rs index ce8ed0dad3..08843b11f0 100644 --- a/crates/nu-command/src/filesystem/touch.rs +++ b/crates/nu-command/src/filesystem/touch.rs @@ -117,7 +117,7 @@ impl Command for Touch { #[allow(deprecated)] let cwd = current_dir(engine_state, stack)?; - for (index, glob) in files.into_iter().enumerate() { + for glob in files { let path = expand_path_with(glob.item.as_ref(), &cwd, glob.item.is_expand()); // If --no-create is passed and the file/dir does not exist there's nothing to do @@ -135,10 +135,7 @@ impl Command for Touch { { return Err(ShellError::CreateNotPossible { msg: format!("Failed to create file: {err}"), - span: call - .positional_nth(index) - .expect("already checked positional") - .span, + span: glob.span, }); }; } @@ -148,10 +145,7 @@ impl Command for Touch { { return Err(ShellError::ChangeModifiedTimeNotPossible { msg: format!("Failed to change the modified time: {err}"), - span: call - .positional_nth(index) - .expect("already checked positional") - .span, + span: glob.span, }); }; } @@ -161,10 +155,7 @@ impl Command for Touch { { return Err(ShellError::ChangeAccessTimeNotPossible { msg: format!("Failed to change the access time: {err}"), - span: call - .positional_nth(index) - .expect("already checked positional") - .span, + span: glob.span, }); }; } diff --git a/crates/nu-command/src/platform/ansi/ansi_.rs b/crates/nu-command/src/platform/ansi/ansi_.rs index 427c7c3a73..9e0b0bba66 100644 --- a/crates/nu-command/src/platform/ansi/ansi_.rs +++ b/crates/nu-command/src/platform/ansi/ansi_.rs @@ -784,10 +784,7 @@ fn heavy_lifting(code: Value, escape: bool, osc: bool, call: &Call) -> Result { return Err(ShellError::TypeMismatch { err_message: String::from("Unknown ansi code"), - span: call - .positional_nth(0) - .expect("Unexpected missing argument") - .span, + span: code.span(), }) } } diff --git a/crates/nu-protocol/src/ast/call.rs b/crates/nu-protocol/src/ast/call.rs index 6689bc3a55..9aaf17d5db 100644 --- a/crates/nu-protocol/src/ast/call.rs +++ b/crates/nu-protocol/src/ast/call.rs @@ -154,30 +154,10 @@ impl Call { }) } - pub fn positional_iter_mut(&mut self) -> impl Iterator { - self.arguments - .iter_mut() - .take_while(|arg| match arg { - Argument::Spread(_) => false, // Don't include positional arguments given to rest parameter - _ => true, - }) - .filter_map(|arg| match arg { - Argument::Named(_) => None, - Argument::Positional(positional) => Some(positional), - Argument::Unknown(unknown) => Some(unknown), - Argument::Spread(_) => None, - }) - } - pub fn positional_nth(&self, i: usize) -> Option<&Expression> { self.positional_iter().nth(i) } - // TODO this method is never used. Delete? - pub fn positional_nth_mut(&mut self, i: usize) -> Option<&mut Expression> { - self.positional_iter_mut().nth(i) - } - pub fn positional_len(&self) -> usize { self.positional_iter().count() } From 6f3dbc97bb77296b68e6da8a9c09d1a5ce5e9003 Mon Sep 17 00:00:00 2001 From: Jack Wright <56345+ayax79@users.noreply.github.com> Date: Wed, 15 May 2024 14:55:07 -0700 Subject: [PATCH 038/137] fixed syntax shape requirements for --quantiles option for polars summary (#12878) Fix for #12730 All of the code expected a list of floats, but the syntax shape expected a table. Resolved by changing the syntax shape to list of floats. cc: @maxim-uvarov --- crates/nu_plugin_polars/src/dataframe/eager/summary.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/nu_plugin_polars/src/dataframe/eager/summary.rs b/crates/nu_plugin_polars/src/dataframe/eager/summary.rs index 8e5151837d..2b22d73bfb 100644 --- a/crates/nu_plugin_polars/src/dataframe/eager/summary.rs +++ b/crates/nu_plugin_polars/src/dataframe/eager/summary.rs @@ -38,7 +38,7 @@ impl PluginCommand for Summary { ) .named( "quantiles", - SyntaxShape::Table(vec![]), + SyntaxShape::List(Box::new(SyntaxShape::Float)), "provide optional quantiles", Some('q'), ) From e20113a0eb8622e27f2ed203f2b628c3d2f7e5f5 Mon Sep 17 00:00:00 2001 From: Ian Manske Date: Wed, 15 May 2024 22:59:10 +0000 Subject: [PATCH 039/137] Remove stack debug assert (#12861) # Description In order for `Stack::unwrap_unique` to work as intended, we currently manually track all references to the parent stack and ensure that they are cleared before calling `Stack::unwrap_unique` in the REPL. We also only call `Stack::unwrap_unique` after all code from the current REPL entry has finished executing. Since `Value`s cannot store `Stack` references, then this should have worked in theory. However, we forgot to account for threads. `run-external` (and maybe the plugin writers) can spawn threads that clone the `Stack`, holding on to references of the parent stack. These threads are not waited/joined upon, and so may finish after the eval has already returned. This PR removes the `Stack::unwrap_unique` function and associated debug assert that was [causing panics](https://gist.github.com/cablehead/f3d2608a1629e607c2d75290829354f7) like @cablehead found. # After Submitting Make values cheaper to clone as a more robust solution to the performance issues with cloning the stack. --------- Co-authored-by: Wind --- crates/nu-cli/src/repl.rs | 4 +++- crates/nu-protocol/src/engine/stack.rs | 32 ++++++-------------------- 2 files changed, 10 insertions(+), 26 deletions(-) diff --git a/crates/nu-cli/src/repl.rs b/crates/nu-cli/src/repl.rs index 2482a7920e..29c2f62734 100644 --- a/crates/nu-cli/src/repl.rs +++ b/crates/nu-cli/src/repl.rs @@ -542,7 +542,9 @@ fn loop_iteration(ctx: LoopContext) -> (bool, Stack, Reedline) { let shell_integration_osc633 = config.shell_integration_osc633; let shell_integration_reset_application_mode = config.shell_integration_reset_application_mode; - let mut stack = Stack::unwrap_unique(stack_arc); + // TODO: we may clone the stack, this can lead to major performance issues + // so we should avoid it or making stack cheaper to clone. + let mut stack = Arc::unwrap_or_clone(stack_arc); perf( "line_editor setup", diff --git a/crates/nu-protocol/src/engine/stack.rs b/crates/nu-protocol/src/engine/stack.rs index 65f2981fbf..19726db9c0 100644 --- a/crates/nu-protocol/src/engine/stack.rs +++ b/crates/nu-protocol/src/engine/stack.rs @@ -75,26 +75,10 @@ impl Stack { } } - /// Unwrap a uniquely-owned stack. - /// - /// In debug mode, this panics if there are multiple references. - /// In production this will instead clone the underlying stack. - pub fn unwrap_unique(stack_arc: Arc) -> Stack { - // If you hit an error here, it's likely that you created an extra - // Arc pointing to the stack somewhere. Make sure that it gets dropped before - // getting here! - Arc::try_unwrap(stack_arc).unwrap_or_else(|arc| { - // in release mode, we clone the stack, but this can lead to - // major performance issues, so we should avoid it - debug_assert!(false, "More than one stack reference remaining!"); - (*arc).clone() - }) - } - /// Create a new child stack from a parent. /// /// Changes from this child can be merged back into the parent with - /// Stack::with_changes_from_child + /// [`Stack::with_changes_from_child`] pub fn with_parent(parent: Arc) -> Stack { Stack { // here we are still cloning environment variable-related information @@ -109,19 +93,17 @@ impl Stack { } } - /// Take an Arc of a parent (assumed to be unique), and a child, and apply - /// all the changes from a child back to the parent. + /// Take an [`Arc`] parent, and a child, and apply all the changes from a child back to the parent. /// - /// Here it is assumed that child was created with a call to Stack::with_parent - /// with parent + /// Here it is assumed that `child` was created by a call to [`Stack::with_parent`] with `parent`. + /// + /// For this to be performant and not clone `parent`, `child` should be the only other + /// referencer of `parent`. pub fn with_changes_from_child(parent: Arc, child: Stack) -> Stack { // we're going to drop the link to the parent stack on our new stack // so that we can unwrap the Arc as a unique reference - // - // This makes the new_stack be in a bit of a weird state, so we shouldn't call - // any structs drop(child.parent_stack); - let mut unique_stack = Stack::unwrap_unique(parent); + let mut unique_stack = Arc::unwrap_or_clone(parent); unique_stack .vars From 1b8eb23785be7efdb7b1511416da77959ee6561b Mon Sep 17 00:00:00 2001 From: Wind Date: Thu, 16 May 2024 16:50:29 +0800 Subject: [PATCH 040/137] allow passing float value to custom command (#12879) # Description Fixes: #12691 In `parse_short_flag`, it only checks special cases for `SyntaxShape::Int`, `SyntaxShape::Number` to allow a flag to be a number. This pr adds `SyntaxShape::Float` to allow a flag to be float number. # User-Facing Changes This is possible after this pr: ```nushell def spam [val: float] { $val }; spam -1.4 ``` # Tests + Formatting Added 1 test --- crates/nu-parser/src/parser.rs | 2 +- tests/repl/test_custom_commands.rs | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/crates/nu-parser/src/parser.rs b/crates/nu-parser/src/parser.rs index 3416e538df..0f72132d10 100644 --- a/crates/nu-parser/src/parser.rs +++ b/crates/nu-parser/src/parser.rs @@ -486,7 +486,7 @@ fn parse_short_flags( && matches!( sig.get_positional(positional_idx), Some(PositionalArg { - shape: SyntaxShape::Int | SyntaxShape::Number, + shape: SyntaxShape::Int | SyntaxShape::Number | SyntaxShape::Float, .. }) ) diff --git a/tests/repl/test_custom_commands.rs b/tests/repl/test_custom_commands.rs index 43a24fa250..1f36b0e90c 100644 --- a/tests/repl/test_custom_commands.rs +++ b/tests/repl/test_custom_commands.rs @@ -274,3 +274,9 @@ fn dont_allow_implicit_casting_between_glob_and_string() -> TestResult { "can't convert", ) } + +#[test] +fn allow_pass_negative_float() -> TestResult { + run_test("def spam [val: float] { $val }; spam -1.4", "-1.4")?; + run_test("def spam [val: float] { $val }; spam -2", "-2") +} From 6fd854ed9f25070e1f5456b4c96d76d5283f188d Mon Sep 17 00:00:00 2001 From: Ian Manske Date: Thu, 16 May 2024 14:11:18 +0000 Subject: [PATCH 041/137] Replace `ExternalStream` with new `ByteStream` type (#12774) # Description This PR introduces a `ByteStream` type which is a `Read`-able stream of bytes. Internally, it has an enum over three different byte stream sources: ```rust pub enum ByteStreamSource { Read(Box), File(File), Child(ChildProcess), } ``` This is in comparison to the current `RawStream` type, which is an `Iterator>` and has to allocate for each read chunk. Currently, `PipelineData::ExternalStream` serves a weird dual role where it is either external command output or a wrapper around `RawStream`. `ByteStream` makes this distinction more clear (via `ByteStreamSource`) and replaces `PipelineData::ExternalStream` in this PR: ```rust pub enum PipelineData { Empty, Value(Value, Option), ListStream(ListStream, Option), ByteStream(ByteStream, Option), } ``` The PR is relatively large, but a decent amount of it is just repetitive changes. This PR fixes #7017, fixes #10763, and fixes #12369. This PR also improves performance when piping external commands. Nushell should, in most cases, have competitive pipeline throughput compared to, e.g., bash. | Command | Before (MB/s) | After (MB/s) | Bash (MB/s) | | -------------------------------------------------- | -------------:| ------------:| -----------:| | `throughput \| rg 'x'` | 3059 | 3744 | 3739 | | `throughput \| nu --testbin relay o> /dev/null` | 3508 | 8087 | 8136 | # User-Facing Changes - This is a breaking change for the plugin communication protocol, because the `ExternalStreamInfo` was replaced with `ByteStreamInfo`. Plugins now only have to deal with a single input stream, as opposed to the previous three streams: stdout, stderr, and exit code. - The output of `describe` has been changed for external/byte streams. - Temporary breaking change: `bytes starts-with` no longer works with byte streams. This is to keep the PR smaller, and `bytes ends-with` already does not work on byte streams. - If a process core dumped, then instead of having a `Value::Error` in the `exit_code` column of the output returned from `complete`, it now is a `Value::Int` with the negation of the signal number. # After Submitting - Update docs and book as necessary - Release notes (e.g., plugin protocol changes) - Adapt/convert commands to work with byte streams (high priority is `str length`, `bytes starts-with`, and maybe `bytes ends-with`). - Refactor the `tee` code, Devyn has already done some work on this. --------- Co-authored-by: Devyn Cairns --- Cargo.lock | 1 + crates/nu-cli/src/completions/completer.rs | 5 +- .../src/completions/custom_completions.rs | 84 +- crates/nu-cli/src/config_files.rs | 5 +- crates/nu-cli/src/eval_cmds.rs | 8 +- crates/nu-cli/src/eval_file.rs | 23 +- crates/nu-cli/src/menus/menu_completions.rs | 3 +- crates/nu-cli/src/util.rs | 144 +- .../src/dataframe/eager/cast.rs | 2 +- .../src/dataframe/eager/filter_with.rs | 3 +- .../src/dataframe/eager/first.rs | 2 +- .../src/dataframe/eager/last.rs | 2 +- .../src/dataframe/eager/rename.rs | 3 +- .../src/dataframe/eager/to_nu.rs | 2 +- .../src/dataframe/eager/with_column.rs | 3 +- .../expressions/expressions_macro.rs | 4 +- .../src/dataframe/expressions/otherwise.rs | 2 +- .../src/dataframe/expressions/quantile.rs | 2 +- .../src/dataframe/expressions/when.rs | 2 +- .../src/dataframe/lazy/explode.rs | 2 +- .../src/dataframe/lazy/fill_nan.rs | 2 +- .../src/dataframe/lazy/fill_null.rs | 2 +- .../src/dataframe/lazy/join.rs | 2 +- .../src/dataframe/lazy/quantile.rs | 2 +- .../src/dataframe/series/masks/is_not_null.rs | 2 +- .../src/dataframe/series/masks/is_null.rs | 2 +- .../src/dataframe/series/n_unique.rs | 2 +- .../src/dataframe/series/shift.rs | 3 +- .../src/dataframe/series/unique.rs | 3 +- .../src/dataframe/test_dataframe.rs | 3 +- .../src/dataframe/values/nu_dataframe/mod.rs | 2 +- .../src/dataframe/values/nu_expression/mod.rs | 2 +- .../src/dataframe/values/nu_lazyframe/mod.rs | 2 +- .../dataframe/values/nu_lazygroupby/mod.rs | 2 +- crates/nu-cmd-extra/src/extra/bits/into.rs | 22 +- .../src/extra/filters/each_while.rs | 50 +- .../src/extra/filters/roll/roll_down.rs | 2 +- .../src/extra/filters/roll/roll_left.rs | 2 +- .../src/extra/filters/roll/roll_right.rs | 2 +- .../src/extra/filters/roll/roll_up.rs | 2 +- .../src/extra/filters/update_cells.rs | 2 +- .../src/extra/strings/format/command.rs | 2 +- .../tests/commands/bytes/starts_with.rs | 160 +-- .../nu-cmd-lang/src/core_commands/collect.rs | 2 +- .../nu-cmd-lang/src/core_commands/describe.rs | 77 +- crates/nu-cmd-lang/src/core_commands/do_.rs | 181 ++- crates/nu-cmd-lang/src/core_commands/for_.rs | 30 +- crates/nu-cmd-lang/src/core_commands/let_.rs | 2 +- crates/nu-cmd-lang/src/core_commands/loop_.rs | 12 +- crates/nu-cmd-lang/src/core_commands/mut_.rs | 2 +- crates/nu-cmd-lang/src/core_commands/try_.rs | 7 +- .../nu-cmd-lang/src/core_commands/while_.rs | 18 +- crates/nu-cmd-lang/src/example_support.rs | 7 +- crates/nu-color-config/src/style_computer.rs | 6 +- crates/nu-command/src/bytes/starts_with.rs | 64 +- crates/nu-command/src/charting/histogram.rs | 2 +- .../nu-command/src/conversions/into/binary.rs | 28 +- .../src/conversions/into/cell_path.rs | 6 +- .../nu-command/src/conversions/into/glob.rs | 20 +- .../nu-command/src/conversions/into/record.rs | 2 +- .../nu-command/src/conversions/into/string.rs | 30 +- .../nu-command/src/database/values/sqlite.rs | 2 +- crates/nu-command/src/debug/inspect.rs | 2 +- crates/nu-command/src/debug/timeit.rs | 5 +- crates/nu-command/src/filesystem/open.rs | 30 +- crates/nu-command/src/filesystem/save.rs | 240 ++-- crates/nu-command/src/filters/columns.rs | 8 +- crates/nu-command/src/filters/drop/column.rs | 6 +- crates/nu-command/src/filters/each.rs | 64 +- crates/nu-command/src/filters/empty.rs | 38 +- crates/nu-command/src/filters/filter.rs | 63 +- crates/nu-command/src/filters/find.rs | 72 +- crates/nu-command/src/filters/first.rs | 6 +- crates/nu-command/src/filters/get.rs | 2 +- crates/nu-command/src/filters/group_by.rs | 2 +- crates/nu-command/src/filters/headers.rs | 2 +- crates/nu-command/src/filters/insert.rs | 10 +- crates/nu-command/src/filters/items.rs | 25 +- crates/nu-command/src/filters/join.rs | 2 +- crates/nu-command/src/filters/last.rs | 14 +- crates/nu-command/src/filters/lines.rs | 175 +-- crates/nu-command/src/filters/par_each.rs | 102 +- crates/nu-command/src/filters/reduce.rs | 2 +- crates/nu-command/src/filters/reject.rs | 2 +- crates/nu-command/src/filters/skip/skip_.rs | 7 +- .../nu-command/src/filters/skip/skip_until.rs | 3 +- .../nu-command/src/filters/skip/skip_while.rs | 3 +- crates/nu-command/src/filters/take/take_.rs | 14 +- .../nu-command/src/filters/take/take_until.rs | 3 +- .../nu-command/src/filters/take/take_while.rs | 3 +- crates/nu-command/src/filters/tee.rs | 460 +++++-- crates/nu-command/src/filters/update.rs | 8 +- crates/nu-command/src/filters/upsert.rs | 18 +- crates/nu-command/src/filters/utils.rs | 2 +- crates/nu-command/src/filters/values.rs | 8 +- crates/nu-command/src/filters/where_.rs | 11 +- crates/nu-command/src/filters/wrap.rs | 4 +- crates/nu-command/src/formats/from/json.rs | 2 +- crates/nu-command/src/formats/from/msgpack.rs | 73 +- .../nu-command/src/formats/from/msgpackz.rs | 22 +- crates/nu-command/src/formats/from/ods.rs | 40 +- crates/nu-command/src/formats/from/xlsx.rs | 38 +- crates/nu-command/src/formats/to/delimited.rs | 2 +- crates/nu-command/src/formats/to/json.rs | 2 +- crates/nu-command/src/formats/to/msgpack.rs | 2 +- crates/nu-command/src/formats/to/msgpackz.rs | 2 +- crates/nu-command/src/formats/to/nuon.rs | 2 +- crates/nu-command/src/formats/to/text.rs | 79 +- crates/nu-command/src/formats/to/toml.rs | 2 +- crates/nu-command/src/formats/to/xml.rs | 2 +- crates/nu-command/src/formats/to/yaml.rs | 2 +- crates/nu-command/src/generators/generate.rs | 18 +- crates/nu-command/src/hash/generic_digest.rs | 60 +- crates/nu-command/src/misc/tutor.rs | 10 +- crates/nu-command/src/network/http/client.rs | 39 +- crates/nu-command/src/network/url/parse.rs | 2 +- crates/nu-command/src/path/join.rs | 4 +- crates/nu-command/src/progress_bar.rs | 12 - .../src/strings/encode_decode/decode.rs | 12 +- .../src/strings/encode_decode/encode.rs | 9 +- crates/nu-command/src/strings/parse.rs | 37 +- crates/nu-command/src/system/complete.rs | 98 +- crates/nu-command/src/system/nu_check.rs | 16 +- crates/nu-command/src/system/run_external.rs | 369 ++--- crates/nu-command/src/viewers/table.rs | 47 +- .../tests/format_conversions/csv.rs | 1 + .../tests/format_conversions/tsv.rs | 1 + crates/nu-engine/src/documentation.rs | 4 +- crates/nu-engine/src/env.rs | 17 +- crates/nu-engine/src/eval.rs | 118 +- crates/nu-explore/src/nu_common/value.rs | 78 +- crates/nu-plugin-core/src/interface/mod.rs | 164 +-- crates/nu-plugin-core/src/interface/tests.rs | 209 +-- crates/nu-plugin-engine/src/context.rs | 2 +- crates/nu-plugin-engine/src/init.rs | 2 +- crates/nu-plugin-engine/src/interface/mod.rs | 10 +- .../nu-plugin-engine/src/interface/tests.rs | 61 +- crates/nu-plugin-protocol/src/lib.rs | 59 +- crates/nu-plugin-test-support/src/lib.rs | 2 +- .../nu-plugin-test-support/src/plugin_test.rs | 65 +- .../tests/custom_value/mod.rs | 2 +- .../nu-plugin-test-support/tests/hello/mod.rs | 2 +- .../tests/lowercase/mod.rs | 2 +- crates/nu-plugin/src/plugin/command.rs | 2 +- crates/nu-plugin/src/plugin/interface/mod.rs | 6 +- .../nu-plugin/src/plugin/interface/tests.rs | 20 +- crates/nu-plugin/src/plugin/mod.rs | 2 +- crates/nu-protocol/Cargo.toml | 4 + .../src/debugger/debugger_trait.rs | 6 +- crates/nu-protocol/src/debugger/profiler.rs | 8 +- crates/nu-protocol/src/errors/shell_error.rs | 70 +- crates/nu-protocol/src/eval_const.rs | 7 +- crates/nu-protocol/src/lib.rs | 7 +- .../nu-protocol/src/pipeline/byte_stream.rs | 822 ++++++++++++ .../list_stream.rs | 0 .../{pipeline_data => pipeline}/metadata.rs | 0 crates/nu-protocol/src/pipeline/mod.rs | 11 + .../{pipeline_data => pipeline}/out_dest.rs | 8 +- .../nu-protocol/src/pipeline/pipeline_data.rs | 725 ++++++++++ crates/nu-protocol/src/pipeline_data/mod.rs | 1185 ----------------- .../src/pipeline_data/raw_stream.rs | 176 --- crates/nu-protocol/src/process/child.rs | 294 ++++ crates/nu-protocol/src/process/exit_status.rs | 64 + crates/nu-protocol/src/process/mod.rs | 5 + crates/nu-protocol/src/util.rs | 52 - .../nu-protocol/tests/test_pipeline_data.rs | 2 +- crates/nu-system/src/foreground.rs | 6 +- .../{collect_external.rs => collect_bytes.rs} | 39 +- crates/nu_plugin_example/src/commands/mod.rs | 4 +- crates/nu_plugin_example/src/lib.rs | 2 +- crates/nu_plugin_polars/src/cache/rm.rs | 2 +- .../src/dataframe/eager/to_arrow.rs | 2 +- .../src/dataframe/eager/to_avro.rs | 2 +- .../src/dataframe/eager/to_csv.rs | 2 +- .../src/dataframe/eager/to_json_lines.rs | 2 +- .../src/dataframe/eager/to_nu.rs | 2 +- .../src/dataframe/eager/to_parquet.rs | 2 +- .../expressions/expressions_macro.rs | 4 +- .../src/dataframe/expressions/is_in.rs | 3 +- .../src/dataframe/expressions/otherwise.rs | 2 +- .../src/dataframe/expressions/when.rs | 2 +- .../src/dataframe/lazy/cast.rs | 2 +- .../src/dataframe/lazy/collect.rs | 2 +- .../src/dataframe/lazy/explode.rs | 11 +- .../src/dataframe/lazy/fetch.rs | 2 +- .../src/dataframe/lazy/fill_nan.rs | 2 +- .../src/dataframe/lazy/fill_null.rs | 2 +- .../src/dataframe/lazy/filter.rs | 2 +- .../src/dataframe/lazy/filter_with.rs | 2 +- .../src/dataframe/lazy/first.rs | 2 +- .../src/dataframe/lazy/groupby.rs | 2 +- .../src/dataframe/lazy/join.rs | 2 +- .../src/dataframe/lazy/last.rs | 2 +- .../src/dataframe/lazy/median.rs | 2 +- .../src/dataframe/lazy/quantile.rs | 2 +- .../src/dataframe/lazy/rename.rs | 2 +- .../src/dataframe/lazy/select.rs | 2 +- .../src/dataframe/lazy/sort_by_expr.rs | 2 +- .../src/dataframe/lazy/with_column.rs | 2 +- .../src/dataframe/series/masks/is_not_null.rs | 3 +- .../src/dataframe/series/masks/is_null.rs | 3 +- .../src/dataframe/series/n_unique.rs | 3 +- .../src/dataframe/series/shift.rs | 2 +- .../src/dataframe/series/unique.rs | 2 +- .../src/dataframe/values/mod.rs | 6 +- .../src/dataframe/values/nu_dataframe/mod.rs | 2 +- .../src/dataframe/values/nu_lazyframe/mod.rs | 2 +- src/main.rs | 29 +- tests/plugins/stream.rs | 21 +- tests/shell/pipeline/commands/internal.rs | 4 +- 210 files changed, 3955 insertions(+), 4012 deletions(-) create mode 100644 crates/nu-protocol/src/pipeline/byte_stream.rs rename crates/nu-protocol/src/{pipeline_data => pipeline}/list_stream.rs (100%) rename crates/nu-protocol/src/{pipeline_data => pipeline}/metadata.rs (100%) create mode 100644 crates/nu-protocol/src/pipeline/mod.rs rename crates/nu-protocol/src/{pipeline_data => pipeline}/out_dest.rs (81%) create mode 100644 crates/nu-protocol/src/pipeline/pipeline_data.rs delete mode 100644 crates/nu-protocol/src/pipeline_data/mod.rs delete mode 100644 crates/nu-protocol/src/pipeline_data/raw_stream.rs create mode 100644 crates/nu-protocol/src/process/child.rs create mode 100644 crates/nu-protocol/src/process/exit_status.rs create mode 100644 crates/nu-protocol/src/process/mod.rs delete mode 100644 crates/nu-protocol/src/util.rs rename crates/nu_plugin_example/src/commands/{collect_external.rs => collect_bytes.rs} (56%) diff --git a/Cargo.lock b/Cargo.lock index 656f859a63..5a9b95cebd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3254,6 +3254,7 @@ dependencies = [ "indexmap", "lru", "miette", + "nix", "nu-path", "nu-system", "nu-test-support", diff --git a/crates/nu-cli/src/completions/completer.rs b/crates/nu-cli/src/completions/completer.rs index 348111f009..007a0e288a 100644 --- a/crates/nu-cli/src/completions/completer.rs +++ b/crates/nu-cli/src/completions/completer.rs @@ -103,9 +103,8 @@ impl NuCompleter { PipelineData::empty(), ); - match result { - Ok(pd) => { - let value = pd.into_value(span); + match result.and_then(|data| data.into_value(span)) { + Ok(value) => { if let Value::List { vals, .. } = value { let result = map_value_completions(vals.iter(), Span::new(span.start, span.end), offset); diff --git a/crates/nu-cli/src/completions/custom_completions.rs b/crates/nu-cli/src/completions/custom_completions.rs index d2ccd5191d..17c8e6a924 100644 --- a/crates/nu-cli/src/completions/custom_completions.rs +++ b/crates/nu-cli/src/completions/custom_completions.rs @@ -74,55 +74,53 @@ impl Completer for CustomCompletion { // Parse result let suggestions = result - .map(|pd| { - let value = pd.into_value(span); - match &value { - Value::Record { val, .. } => { - let completions = val - .get("completions") - .and_then(|val| { - val.as_list() - .ok() - .map(|it| map_value_completions(it.iter(), span, offset)) - }) - .unwrap_or_default(); - let options = val.get("options"); + .and_then(|data| data.into_value(span)) + .map(|value| match &value { + Value::Record { val, .. } => { + let completions = val + .get("completions") + .and_then(|val| { + val.as_list() + .ok() + .map(|it| map_value_completions(it.iter(), span, offset)) + }) + .unwrap_or_default(); + let options = val.get("options"); - if let Some(Value::Record { val: options, .. }) = &options { - let should_sort = options - .get("sort") - .and_then(|val| val.as_bool().ok()) - .unwrap_or(false); + if let Some(Value::Record { val: options, .. }) = &options { + let should_sort = options + .get("sort") + .and_then(|val| val.as_bool().ok()) + .unwrap_or(false); - if should_sort { - self.sort_by = SortBy::Ascending; - } - - custom_completion_options = Some(CompletionOptions { - case_sensitive: options - .get("case_sensitive") - .and_then(|val| val.as_bool().ok()) - .unwrap_or(true), - positional: options - .get("positional") - .and_then(|val| val.as_bool().ok()) - .unwrap_or(true), - match_algorithm: match options.get("completion_algorithm") { - Some(option) => option - .coerce_string() - .ok() - .and_then(|option| option.try_into().ok()) - .unwrap_or(MatchAlgorithm::Prefix), - None => completion_options.match_algorithm, - }, - }); + if should_sort { + self.sort_by = SortBy::Ascending; } - completions + custom_completion_options = Some(CompletionOptions { + case_sensitive: options + .get("case_sensitive") + .and_then(|val| val.as_bool().ok()) + .unwrap_or(true), + positional: options + .get("positional") + .and_then(|val| val.as_bool().ok()) + .unwrap_or(true), + match_algorithm: match options.get("completion_algorithm") { + Some(option) => option + .coerce_string() + .ok() + .and_then(|option| option.try_into().ok()) + .unwrap_or(MatchAlgorithm::Prefix), + None => completion_options.match_algorithm, + }, + }); } - Value::List { vals, .. } => map_value_completions(vals.iter(), span, offset), - _ => vec![], + + completions } + Value::List { vals, .. } => map_value_completions(vals.iter(), span, offset), + _ => vec![], }) .unwrap_or_default(); diff --git a/crates/nu-cli/src/config_files.rs b/crates/nu-cli/src/config_files.rs index e89fa6c1d1..ec7ad2f412 100644 --- a/crates/nu-cli/src/config_files.rs +++ b/crates/nu-cli/src/config_files.rs @@ -306,14 +306,15 @@ pub fn migrate_old_plugin_file(engine_state: &EngineState, storage_path: &str) - let mut engine_state = engine_state.clone(); let mut stack = Stack::new(); - if !eval_source( + if eval_source( &mut engine_state, &mut stack, &old_contents, &old_plugin_file_path.to_string_lossy(), PipelineData::Empty, false, - ) { + ) != 0 + { return false; } diff --git a/crates/nu-cli/src/eval_cmds.rs b/crates/nu-cli/src/eval_cmds.rs index 0b0b5f8ddf..8fa3bf30e5 100644 --- a/crates/nu-cli/src/eval_cmds.rs +++ b/crates/nu-cli/src/eval_cmds.rs @@ -1,5 +1,4 @@ use log::info; -use miette::Result; use nu_engine::{convert_env_values, eval_block}; use nu_parser::parse; use nu_protocol::{ @@ -59,9 +58,10 @@ pub fn evaluate_commands( t_mode.coerce_str()?.parse().unwrap_or_default(); } - let exit_code = pipeline.print(engine_state, stack, no_newline, false)?; - if exit_code != 0 { - std::process::exit(exit_code as i32); + if let Some(status) = pipeline.print(engine_state, stack, no_newline, false)? { + if status.code() != 0 { + std::process::exit(status.code()) + } } info!("evaluate {}:{}:{}", file!(), line!(), column!()); diff --git a/crates/nu-cli/src/eval_file.rs b/crates/nu-cli/src/eval_file.rs index 7483c6bc33..ff6ba36fe3 100644 --- a/crates/nu-cli/src/eval_file.rs +++ b/crates/nu-cli/src/eval_file.rs @@ -96,7 +96,7 @@ pub fn evaluate_file( engine_state.merge_delta(working_set.delta)?; // Check if the file contains a main command. - if engine_state.find_decl(b"main", &[]).is_some() { + let exit_code = if engine_state.find_decl(b"main", &[]).is_some() { // Evaluate the file, but don't run main yet. let pipeline = match eval_block::(engine_state, stack, &block, PipelineData::empty()) { @@ -109,26 +109,29 @@ pub fn evaluate_file( }; // Print the pipeline output of the last command of the file. - let exit_code = pipeline.print(engine_state, stack, true, false)?; - if exit_code != 0 { - std::process::exit(exit_code as i32); + if let Some(status) = pipeline.print(engine_state, stack, true, false)? { + if status.code() != 0 { + std::process::exit(status.code()) + } } // Invoke the main command with arguments. // Arguments with whitespaces are quoted, thus can be safely concatenated by whitespace. let args = format!("main {}", args.join(" ")); - if !eval_source( + eval_source( engine_state, stack, args.as_bytes(), "", input, true, - ) { - std::process::exit(1); - } - } else if !eval_source(engine_state, stack, &file, file_path_str, input, true) { - std::process::exit(1); + ) + } else { + eval_source(engine_state, stack, &file, file_path_str, input, true) + }; + + if exit_code != 0 { + std::process::exit(exit_code) } info!("evaluate {}:{}:{}", file!(), line!(), column!()); diff --git a/crates/nu-cli/src/menus/menu_completions.rs b/crates/nu-cli/src/menus/menu_completions.rs index fbfc598225..c65f0bd100 100644 --- a/crates/nu-cli/src/menus/menu_completions.rs +++ b/crates/nu-cli/src/menus/menu_completions.rs @@ -59,8 +59,7 @@ impl Completer for NuMenuCompleter { let res = eval_block::(&self.engine_state, &mut self.stack, block, input); - if let Ok(values) = res { - let values = values.into_value(self.span); + if let Ok(values) = res.and_then(|data| data.into_value(self.span)) { convert_to_suggestions(values, line, pos, self.only_buffer_difference) } else { Vec::new() diff --git a/crates/nu-cli/src/util.rs b/crates/nu-cli/src/util.rs index 2f996691c9..7ebea0deb2 100644 --- a/crates/nu-cli/src/util.rs +++ b/crates/nu-cli/src/util.rs @@ -4,7 +4,7 @@ use nu_parser::{escape_quote_string, lex, parse, unescape_unquote_string, Token, use nu_protocol::{ debugger::WithoutDebug, engine::{EngineState, Stack, StateWorkingSet}, - print_if_stream, report_error, report_error_new, PipelineData, ShellError, Span, Value, + report_error, report_error_new, PipelineData, ShellError, Span, Value, }; #[cfg(windows)] use nu_utils::enable_vt_processing; @@ -206,9 +206,48 @@ pub fn eval_source( fname: &str, input: PipelineData, allow_return: bool, -) -> bool { +) -> i32 { let start_time = std::time::Instant::now(); + let exit_code = match evaluate_source(engine_state, stack, source, fname, input, allow_return) { + Ok(code) => code.unwrap_or(0), + Err(err) => { + report_error_new(engine_state, &err); + 1 + } + }; + + stack.add_env_var( + "LAST_EXIT_CODE".to_string(), + Value::int(exit_code.into(), Span::unknown()), + ); + + // reset vt processing, aka ansi because illbehaved externals can break it + #[cfg(windows)] + { + let _ = enable_vt_processing(); + } + + perf( + &format!("eval_source {}", &fname), + start_time, + file!(), + line!(), + column!(), + engine_state.get_config().use_ansi_coloring, + ); + + exit_code +} + +fn evaluate_source( + engine_state: &mut EngineState, + stack: &mut Stack, + source: &[u8], + fname: &str, + input: PipelineData, + allow_return: bool, +) -> Result, ShellError> { let (block, delta) = { let mut working_set = StateWorkingSet::new(engine_state); let output = parse( @@ -222,97 +261,40 @@ pub fn eval_source( } if let Some(err) = working_set.parse_errors.first() { - set_last_exit_code(stack, 1); report_error(&working_set, err); - return false; + return Ok(Some(1)); } (output, working_set.render()) }; - if let Err(err) = engine_state.merge_delta(delta) { - set_last_exit_code(stack, 1); - report_error_new(engine_state, &err); - return false; - } + engine_state.merge_delta(delta)?; - let b = if allow_return { + let pipeline = if allow_return { eval_block_with_early_return::(engine_state, stack, &block, input) } else { eval_block::(engine_state, stack, &block, input) + }?; + + let status = if let PipelineData::ByteStream(stream, ..) = pipeline { + stream.print(false)? + } else { + if let Some(hook) = engine_state.get_config().hooks.display_output.clone() { + let pipeline = eval_hook( + engine_state, + stack, + Some(pipeline), + vec![], + &hook, + "display_output", + )?; + pipeline.print(engine_state, stack, false, false) + } else { + pipeline.print(engine_state, stack, true, false) + }? }; - match b { - Ok(pipeline_data) => { - let config = engine_state.get_config(); - let result; - if let PipelineData::ExternalStream { - stdout: stream, - stderr: stderr_stream, - exit_code, - .. - } = pipeline_data - { - result = print_if_stream(stream, stderr_stream, false, exit_code); - } else if let Some(hook) = config.hooks.display_output.clone() { - match eval_hook( - engine_state, - stack, - Some(pipeline_data), - vec![], - &hook, - "display_output", - ) { - Err(err) => { - result = Err(err); - } - Ok(val) => { - result = val.print(engine_state, stack, false, false); - } - } - } else { - result = pipeline_data.print(engine_state, stack, true, false); - } - - match result { - Err(err) => { - report_error_new(engine_state, &err); - return false; - } - Ok(exit_code) => { - set_last_exit_code(stack, exit_code); - } - } - - // reset vt processing, aka ansi because illbehaved externals can break it - #[cfg(windows)] - { - let _ = enable_vt_processing(); - } - } - Err(err) => { - set_last_exit_code(stack, 1); - report_error_new(engine_state, &err); - return false; - } - } - perf( - &format!("eval_source {}", &fname), - start_time, - file!(), - line!(), - column!(), - engine_state.get_config().use_ansi_coloring, - ); - - true -} - -fn set_last_exit_code(stack: &mut Stack, exit_code: i64) { - stack.add_env_var( - "LAST_EXIT_CODE".to_string(), - Value::int(exit_code, Span::unknown()), - ); + Ok(status.map(|status| status.code())) } #[cfg(test)] diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/cast.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/cast.rs index c170f8db8a..be9c33a229 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/cast.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/eager/cast.rs @@ -79,7 +79,7 @@ impl Command for CastDF { call: &Call, input: PipelineData, ) -> Result { - let value = input.into_value(call.head); + let value = input.into_value(call.head)?; if NuLazyFrame::can_downcast(&value) { let (dtype, column_nm) = df_args(engine_state, stack, call)?; let df = NuLazyFrame::try_from_value(value)?; diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/filter_with.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/filter_with.rs index 3793945181..e0e94d10a0 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/filter_with.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/eager/filter_with.rs @@ -72,8 +72,7 @@ impl Command for FilterWith { call: &Call, input: PipelineData, ) -> Result { - let value = input.into_value(call.head); - + let value = input.into_value(call.head)?; if NuLazyFrame::can_downcast(&value) { let df = NuLazyFrame::try_from_value(value)?; command_lazy(engine_state, stack, call, df) diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/first.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/first.rs index 70160b3005..14c86e8c40 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/first.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/eager/first.rs @@ -86,7 +86,7 @@ impl Command for FirstDF { call: &Call, input: PipelineData, ) -> Result { - let value = input.into_value(call.head); + let value = input.into_value(call.head)?; if NuDataFrame::can_downcast(&value) { let df = NuDataFrame::try_from_value(value)?; command(engine_state, stack, call, df) diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/last.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/last.rs index a0a188471d..ff2c4f98a2 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/last.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/eager/last.rs @@ -61,7 +61,7 @@ impl Command for LastDF { call: &Call, input: PipelineData, ) -> Result { - let value = input.into_value(call.head); + let value = input.into_value(call.head)?; if NuDataFrame::can_downcast(&value) { let df = NuDataFrame::try_from_value(value)?; command(engine_state, stack, call, df) diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/rename.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/rename.rs index 5167a0c968..0cb75f34f2 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/rename.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/eager/rename.rs @@ -109,8 +109,7 @@ impl Command for RenameDF { call: &Call, input: PipelineData, ) -> Result { - let value = input.into_value(call.head); - + let value = input.into_value(call.head)?; if NuLazyFrame::can_downcast(&value) { let df = NuLazyFrame::try_from_value(value)?; command_lazy(engine_state, stack, call, df) diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/to_nu.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/to_nu.rs index 73dadacb2b..a6ab42052c 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/to_nu.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/eager/to_nu.rs @@ -76,7 +76,7 @@ impl Command for ToNu { call: &Call, input: PipelineData, ) -> Result { - let value = input.into_value(call.head); + let value = input.into_value(call.head)?; if NuDataFrame::can_downcast(&value) { dataframe_command(engine_state, stack, call, value) } else { diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/with_column.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/with_column.rs index 52ceefceb4..79d3427e8a 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/with_column.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/eager/with_column.rs @@ -102,8 +102,7 @@ impl Command for WithColumn { call: &Call, input: PipelineData, ) -> Result { - let value = input.into_value(call.head); - + let value = input.into_value(call.head)?; if NuLazyFrame::can_downcast(&value) { let df = NuLazyFrame::try_from_value(value)?; command_lazy(engine_state, stack, call, df) diff --git a/crates/nu-cmd-dataframe/src/dataframe/expressions/expressions_macro.rs b/crates/nu-cmd-dataframe/src/dataframe/expressions/expressions_macro.rs index b2d79be010..4cc56e030b 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/expressions/expressions_macro.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/expressions/expressions_macro.rs @@ -172,7 +172,7 @@ macro_rules! lazy_expr_command { call: &Call, input: PipelineData, ) -> Result { - let value = input.into_value(call.head); + let value = input.into_value(call.head)?; if NuDataFrame::can_downcast(&value) { let lazy = NuLazyFrame::try_from_value(value)?; let lazy = NuLazyFrame::new( @@ -271,7 +271,7 @@ macro_rules! lazy_expr_command { call: &Call, input: PipelineData, ) -> Result { - let value = input.into_value(call.head); + let value = input.into_value(call.head)?; if NuDataFrame::can_downcast(&value) { let lazy = NuLazyFrame::try_from_value(value)?; let lazy = NuLazyFrame::new( diff --git a/crates/nu-cmd-dataframe/src/dataframe/expressions/otherwise.rs b/crates/nu-cmd-dataframe/src/dataframe/expressions/otherwise.rs index 0ba507f97f..eb97c575b7 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/expressions/otherwise.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/expressions/otherwise.rs @@ -91,7 +91,7 @@ impl Command for ExprOtherwise { let otherwise_predicate: Value = call.req(engine_state, stack, 0)?; let otherwise_predicate = NuExpression::try_from_value(otherwise_predicate)?; - let value = input.into_value(call.head); + let value = input.into_value(call.head)?; let complete: NuExpression = match NuWhen::try_from_value(value)? { NuWhen::Then(then) => then.otherwise(otherwise_predicate.into_polars()).into(), NuWhen::ChainedThen(chained_when) => chained_when diff --git a/crates/nu-cmd-dataframe/src/dataframe/expressions/quantile.rs b/crates/nu-cmd-dataframe/src/dataframe/expressions/quantile.rs index d82a0faf0a..aaa1029ee9 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/expressions/quantile.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/expressions/quantile.rs @@ -67,7 +67,7 @@ impl Command for ExprQuantile { call: &Call, input: PipelineData, ) -> Result { - let value = input.into_value(call.head); + let value = input.into_value(call.head)?; let quantile: f64 = call.req(engine_state, stack, 0)?; let expr = NuExpression::try_from_value(value)?; diff --git a/crates/nu-cmd-dataframe/src/dataframe/expressions/when.rs b/crates/nu-cmd-dataframe/src/dataframe/expressions/when.rs index d70fd00825..5a6aad2de7 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/expressions/when.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/expressions/when.rs @@ -103,7 +103,7 @@ impl Command for ExprWhen { let then_predicate: Value = call.req(engine_state, stack, 1)?; let then_predicate = NuExpression::try_from_value(then_predicate)?; - let value = input.into_value(call.head); + let value = input.into_value(call.head)?; let when_then: NuWhen = match value { Value::Nothing { .. } => when(when_predicate.into_polars()) .then(then_predicate.into_polars()) diff --git a/crates/nu-cmd-dataframe/src/dataframe/lazy/explode.rs b/crates/nu-cmd-dataframe/src/dataframe/lazy/explode.rs index 8e32ae8040..a027e84d36 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/lazy/explode.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/lazy/explode.rs @@ -100,7 +100,7 @@ impl Command for LazyExplode { } pub(crate) fn explode(call: &Call, input: PipelineData) -> Result { - let value = input.into_value(call.head); + let value = input.into_value(call.head)?; if NuDataFrame::can_downcast(&value) { let df = NuLazyFrame::try_from_value(value)?; let columns: Vec = call diff --git a/crates/nu-cmd-dataframe/src/dataframe/lazy/fill_nan.rs b/crates/nu-cmd-dataframe/src/dataframe/lazy/fill_nan.rs index a9a1eb1590..4c75f1d9a3 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/lazy/fill_nan.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/lazy/fill_nan.rs @@ -82,7 +82,7 @@ impl Command for LazyFillNA { input: PipelineData, ) -> Result { let fill: Value = call.req(engine_state, stack, 0)?; - let value = input.into_value(call.head); + let value = input.into_value(call.head)?; if NuExpression::can_downcast(&value) { let expr = NuExpression::try_from_value(value)?; diff --git a/crates/nu-cmd-dataframe/src/dataframe/lazy/fill_null.rs b/crates/nu-cmd-dataframe/src/dataframe/lazy/fill_null.rs index b3d35d2b8d..88be2a9e88 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/lazy/fill_null.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/lazy/fill_null.rs @@ -59,7 +59,7 @@ impl Command for LazyFillNull { input: PipelineData, ) -> Result { let fill: Value = call.req(engine_state, stack, 0)?; - let value = input.into_value(call.head); + let value = input.into_value(call.head)?; if NuExpression::can_downcast(&value) { let expr = NuExpression::try_from_value(value)?; diff --git a/crates/nu-cmd-dataframe/src/dataframe/lazy/join.rs b/crates/nu-cmd-dataframe/src/dataframe/lazy/join.rs index 7f7d1ab66b..4ae297acfd 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/lazy/join.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/lazy/join.rs @@ -219,7 +219,7 @@ impl Command for LazyJoin { let suffix: Option = call.get_flag(engine_state, stack, "suffix")?; let suffix = suffix.unwrap_or_else(|| "_x".into()); - let value = input.into_value(call.head); + let value = input.into_value(call.head)?; let lazy = NuLazyFrame::try_from_value(value)?; let from_eager = lazy.from_eager; let lazy = lazy.into_polars(); diff --git a/crates/nu-cmd-dataframe/src/dataframe/lazy/quantile.rs b/crates/nu-cmd-dataframe/src/dataframe/lazy/quantile.rs index d17a444f49..ac8ec590c6 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/lazy/quantile.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/lazy/quantile.rs @@ -54,7 +54,7 @@ impl Command for LazyQuantile { call: &Call, input: PipelineData, ) -> Result { - let value = input.into_value(call.head); + let value = input.into_value(call.head)?; let quantile: f64 = call.req(engine_state, stack, 0)?; let lazy = NuLazyFrame::try_from_value(value)?; diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/masks/is_not_null.rs b/crates/nu-cmd-dataframe/src/dataframe/series/masks/is_not_null.rs index ce66f69877..4ed33ce951 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/series/masks/is_not_null.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/series/masks/is_not_null.rs @@ -68,7 +68,7 @@ impl Command for IsNotNull { call: &Call, input: PipelineData, ) -> Result { - let value = input.into_value(call.head); + let value = input.into_value(call.head)?; if NuDataFrame::can_downcast(&value) { let df = NuDataFrame::try_from_value(value)?; command(engine_state, stack, call, df) diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/masks/is_null.rs b/crates/nu-cmd-dataframe/src/dataframe/series/masks/is_null.rs index d7921da347..b99d48af66 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/series/masks/is_null.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/series/masks/is_null.rs @@ -68,7 +68,7 @@ impl Command for IsNull { call: &Call, input: PipelineData, ) -> Result { - let value = input.into_value(call.head); + let value = input.into_value(call.head)?; if NuDataFrame::can_downcast(&value) { let df = NuDataFrame::try_from_value(value)?; command(engine_state, stack, call, df) diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/n_unique.rs b/crates/nu-cmd-dataframe/src/dataframe/series/n_unique.rs index b23ab4e20d..c6d6e829f8 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/series/n_unique.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/series/n_unique.rs @@ -60,7 +60,7 @@ impl Command for NUnique { call: &Call, input: PipelineData, ) -> Result { - let value = input.into_value(call.head); + let value = input.into_value(call.head)?; if NuDataFrame::can_downcast(&value) { let df = NuDataFrame::try_from_value(value)?; command(engine_state, stack, call, df) diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/shift.rs b/crates/nu-cmd-dataframe/src/dataframe/series/shift.rs index bf842840b4..2f40cf0a45 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/series/shift.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/series/shift.rs @@ -56,8 +56,7 @@ impl Command for Shift { call: &Call, input: PipelineData, ) -> Result { - let value = input.into_value(call.head); - + let value = input.into_value(call.head)?; if NuLazyFrame::can_downcast(&value) { let df = NuLazyFrame::try_from_value(value)?; command_lazy(engine_state, stack, call, df) diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/unique.rs b/crates/nu-cmd-dataframe/src/dataframe/series/unique.rs index 13012b4fb3..1bc2e0dc1b 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/series/unique.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/series/unique.rs @@ -72,8 +72,7 @@ impl Command for Unique { call: &Call, input: PipelineData, ) -> Result { - let value = input.into_value(call.head); - + let value = input.into_value(call.head)?; if NuLazyFrame::can_downcast(&value) { let df = NuLazyFrame::try_from_value(value)?; command_lazy(engine_state, stack, call, df) diff --git a/crates/nu-cmd-dataframe/src/dataframe/test_dataframe.rs b/crates/nu-cmd-dataframe/src/dataframe/test_dataframe.rs index d6febf7e43..39c30be9dd 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/test_dataframe.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/test_dataframe.rs @@ -80,7 +80,8 @@ pub fn test_dataframe_example(engine_state: &mut Box, example: &Exa let result = eval_block::(engine_state, &mut stack, &block, PipelineData::empty()) .unwrap_or_else(|err| panic!("test eval error in `{}`: {:?}", example.example, err)) - .into_value(Span::test_data()); + .into_value(Span::test_data()) + .expect("ok value"); println!("input: {}", example.example); println!("result: {result:?}"); diff --git a/crates/nu-cmd-dataframe/src/dataframe/values/nu_dataframe/mod.rs b/crates/nu-cmd-dataframe/src/dataframe/values/nu_dataframe/mod.rs index 8b828aee50..967e03580f 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/values/nu_dataframe/mod.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/values/nu_dataframe/mod.rs @@ -297,7 +297,7 @@ impl NuDataFrame { } pub fn try_from_pipeline(input: PipelineData, span: Span) -> Result { - let value = input.into_value(span); + let value = input.into_value(span)?; Self::try_from_value(value) } diff --git a/crates/nu-cmd-dataframe/src/dataframe/values/nu_expression/mod.rs b/crates/nu-cmd-dataframe/src/dataframe/values/nu_expression/mod.rs index 8646cdefd0..cee31d7b53 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/values/nu_expression/mod.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/values/nu_expression/mod.rs @@ -84,7 +84,7 @@ impl NuExpression { } pub fn try_from_pipeline(input: PipelineData, span: Span) -> Result { - let value = input.into_value(span); + let value = input.into_value(span)?; Self::try_from_value(value) } diff --git a/crates/nu-cmd-dataframe/src/dataframe/values/nu_lazyframe/mod.rs b/crates/nu-cmd-dataframe/src/dataframe/values/nu_lazyframe/mod.rs index f03d9f0cc8..355516d340 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/values/nu_lazyframe/mod.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/values/nu_lazyframe/mod.rs @@ -134,7 +134,7 @@ impl NuLazyFrame { } pub fn try_from_pipeline(input: PipelineData, span: Span) -> Result { - let value = input.into_value(span); + let value = input.into_value(span)?; Self::try_from_value(value) } diff --git a/crates/nu-cmd-dataframe/src/dataframe/values/nu_lazygroupby/mod.rs b/crates/nu-cmd-dataframe/src/dataframe/values/nu_lazygroupby/mod.rs index e942e3be97..e1bcb30069 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/values/nu_lazygroupby/mod.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/values/nu_lazygroupby/mod.rs @@ -107,7 +107,7 @@ impl NuLazyGroupBy { } pub fn try_from_pipeline(input: PipelineData, span: Span) -> Result { - let value = input.into_value(span); + let value = input.into_value(span)?; Self::try_from_value(value) } } diff --git a/crates/nu-cmd-extra/src/extra/bits/into.rs b/crates/nu-cmd-extra/src/extra/bits/into.rs index c7fd09b728..cf85f92ac5 100644 --- a/crates/nu-cmd-extra/src/extra/bits/into.rs +++ b/crates/nu-cmd-extra/src/extra/bits/into.rs @@ -118,22 +118,12 @@ fn into_bits( let cell_paths = call.rest(engine_state, stack, 0)?; let cell_paths = (!cell_paths.is_empty()).then_some(cell_paths); - match input { - PipelineData::ExternalStream { stdout: None, .. } => { - Ok(Value::binary(vec![], head).into_pipeline_data()) - } - PipelineData::ExternalStream { - stdout: Some(stream), - .. - } => { - // TODO: in the future, we may want this to stream out, converting each to bytes - let output = stream.into_bytes()?; - Ok(Value::binary(output.item, head).into_pipeline_data()) - } - _ => { - let args = Arguments { cell_paths }; - operate(action, args, input, call.head, engine_state.ctrlc.clone()) - } + if let PipelineData::ByteStream(stream, ..) = input { + // TODO: in the future, we may want this to stream out, converting each to bytes + Ok(Value::binary(stream.into_bytes()?, head).into_pipeline_data()) + } else { + let args = Arguments { cell_paths }; + operate(action, args, input, call.head, engine_state.ctrlc.clone()) } } diff --git a/crates/nu-cmd-extra/src/extra/filters/each_while.rs b/crates/nu-cmd-extra/src/extra/filters/each_while.rs index 939f194f43..58679c8eea 100644 --- a/crates/nu-cmd-extra/src/extra/filters/each_while.rs +++ b/crates/nu-cmd-extra/src/extra/filters/each_while.rs @@ -78,38 +78,40 @@ impl Command for EachWhile { | PipelineData::ListStream(..) => { let mut closure = ClosureEval::new(engine_state, stack, closure); Ok(input - .into_iter() - .map_while(move |value| match closure.run_with_value(value) { - Ok(data) => { - let value = data.into_value(head); - (!value.is_nothing()).then_some(value) - } - Err(_) => None, - }) - .fuse() - .into_pipeline_data(head, engine_state.ctrlc.clone())) - } - PipelineData::ExternalStream { stdout: None, .. } => Ok(PipelineData::empty()), - PipelineData::ExternalStream { - stdout: Some(stream), - .. - } => { - let mut closure = ClosureEval::new(engine_state, stack, closure); - Ok(stream .into_iter() .map_while(move |value| { - let value = value.ok()?; - match closure.run_with_value(value) { - Ok(data) => { - let value = data.into_value(head); - (!value.is_nothing()).then_some(value) - } + match closure + .run_with_value(value) + .and_then(|data| data.into_value(head)) + { + Ok(value) => (!value.is_nothing()).then_some(value), Err(_) => None, } }) .fuse() .into_pipeline_data(head, engine_state.ctrlc.clone())) } + PipelineData::ByteStream(stream, ..) => { + let span = stream.span(); + if let Some(chunks) = stream.chunks() { + let mut closure = ClosureEval::new(engine_state, stack, closure); + Ok(chunks + .map_while(move |value| { + let value = value.ok()?; + match closure + .run_with_value(value) + .and_then(|data| data.into_value(span)) + { + Ok(value) => (!value.is_nothing()).then_some(value), + Err(_) => None, + } + }) + .fuse() + .into_pipeline_data(head, engine_state.ctrlc.clone())) + } else { + Ok(PipelineData::Empty) + } + } // This match allows non-iterables to be accepted, // which is currently considered undesirable (Nov 2022). PipelineData::Value(value, ..) => { diff --git a/crates/nu-cmd-extra/src/extra/filters/roll/roll_down.rs b/crates/nu-cmd-extra/src/extra/filters/roll/roll_down.rs index 465b9f1f4c..24ea1bc309 100644 --- a/crates/nu-cmd-extra/src/extra/filters/roll/roll_down.rs +++ b/crates/nu-cmd-extra/src/extra/filters/roll/roll_down.rs @@ -56,7 +56,7 @@ impl Command for RollDown { let by: Option = call.get_flag(engine_state, stack, "by")?; let metadata = input.metadata(); - let value = input.into_value(call.head); + let value = input.into_value(call.head)?; let rotated_value = vertical_rotate_value(value, by, VerticalDirection::Down)?; Ok(rotated_value.into_pipeline_data().set_metadata(metadata)) diff --git a/crates/nu-cmd-extra/src/extra/filters/roll/roll_left.rs b/crates/nu-cmd-extra/src/extra/filters/roll/roll_left.rs index ff69f23268..789b70830d 100644 --- a/crates/nu-cmd-extra/src/extra/filters/roll/roll_left.rs +++ b/crates/nu-cmd-extra/src/extra/filters/roll/roll_left.rs @@ -94,7 +94,7 @@ impl Command for RollLeft { let metadata = input.metadata(); let cells_only = call.has_flag(engine_state, stack, "cells-only")?; - let value = input.into_value(call.head); + let value = input.into_value(call.head)?; let rotated_value = horizontal_rotate_value(value, by, cells_only, &HorizontalDirection::Left)?; diff --git a/crates/nu-cmd-extra/src/extra/filters/roll/roll_right.rs b/crates/nu-cmd-extra/src/extra/filters/roll/roll_right.rs index d190960581..55a1e42158 100644 --- a/crates/nu-cmd-extra/src/extra/filters/roll/roll_right.rs +++ b/crates/nu-cmd-extra/src/extra/filters/roll/roll_right.rs @@ -94,7 +94,7 @@ impl Command for RollRight { let metadata = input.metadata(); let cells_only = call.has_flag(engine_state, stack, "cells-only")?; - let value = input.into_value(call.head); + let value = input.into_value(call.head)?; let rotated_value = horizontal_rotate_value(value, by, cells_only, &HorizontalDirection::Right)?; diff --git a/crates/nu-cmd-extra/src/extra/filters/roll/roll_up.rs b/crates/nu-cmd-extra/src/extra/filters/roll/roll_up.rs index 1cd74fe247..7b9480599d 100644 --- a/crates/nu-cmd-extra/src/extra/filters/roll/roll_up.rs +++ b/crates/nu-cmd-extra/src/extra/filters/roll/roll_up.rs @@ -56,7 +56,7 @@ impl Command for RollUp { let by: Option = call.get_flag(engine_state, stack, "by")?; let metadata = input.metadata(); - let value = input.into_value(call.head); + let value = input.into_value(call.head)?; let rotated_value = vertical_rotate_value(value, by, VerticalDirection::Up)?; Ok(rotated_value.into_pipeline_data().set_metadata(metadata)) diff --git a/crates/nu-cmd-extra/src/extra/filters/update_cells.rs b/crates/nu-cmd-extra/src/extra/filters/update_cells.rs index 9fe9bfe389..c90e933410 100644 --- a/crates/nu-cmd-extra/src/extra/filters/update_cells.rs +++ b/crates/nu-cmd-extra/src/extra/filters/update_cells.rs @@ -152,7 +152,7 @@ impl Iterator for UpdateCellIterator { fn eval_value(closure: &mut ClosureEval, span: Span, value: Value) -> Value { closure .run_with_value(value) - .map(|data| data.into_value(span)) + .and_then(|data| data.into_value(span)) .unwrap_or_else(|err| Value::error(err, span)) } diff --git a/crates/nu-cmd-extra/src/extra/strings/format/command.rs b/crates/nu-cmd-extra/src/extra/strings/format/command.rs index 932b5ccb7f..1c72627779 100644 --- a/crates/nu-cmd-extra/src/extra/strings/format/command.rs +++ b/crates/nu-cmd-extra/src/extra/strings/format/command.rs @@ -39,7 +39,7 @@ impl Command for FormatPattern { let mut working_set = StateWorkingSet::new(engine_state); let specified_pattern: Result = call.req(engine_state, stack, 0); - let input_val = input.into_value(call.head); + let input_val = input.into_value(call.head)?; // add '$it' variable to support format like this: $it.column1.column2. let it_id = working_set.add_variable(b"$it".to_vec(), call.head, Type::Any, false); stack.add_var(it_id, input_val.clone()); diff --git a/crates/nu-cmd-extra/tests/commands/bytes/starts_with.rs b/crates/nu-cmd-extra/tests/commands/bytes/starts_with.rs index e7d57698b5..c3ad1ec448 100644 --- a/crates/nu-cmd-extra/tests/commands/bytes/starts_with.rs +++ b/crates/nu-cmd-extra/tests/commands/bytes/starts_with.rs @@ -19,102 +19,102 @@ fn basic_string_fails() { assert_eq!(actual.out, ""); } -#[test] -fn short_stream_binary() { - let actual = nu!(r#" - nu --testbin repeater (0x[01]) 5 | bytes starts-with 0x[010101] - "#); +// #[test] +// fn short_stream_binary() { +// let actual = nu!(r#" +// nu --testbin repeater (0x[01]) 5 | bytes starts-with 0x[010101] +// "#); - assert_eq!(actual.out, "true"); -} +// assert_eq!(actual.out, "true"); +// } -#[test] -fn short_stream_mismatch() { - let actual = nu!(r#" - nu --testbin repeater (0x[010203]) 5 | bytes starts-with 0x[010204] - "#); +// #[test] +// fn short_stream_mismatch() { +// let actual = nu!(r#" +// nu --testbin repeater (0x[010203]) 5 | bytes starts-with 0x[010204] +// "#); - assert_eq!(actual.out, "false"); -} +// assert_eq!(actual.out, "false"); +// } -#[test] -fn short_stream_binary_overflow() { - let actual = nu!(r#" - nu --testbin repeater (0x[01]) 5 | bytes starts-with 0x[010101010101] - "#); +// #[test] +// fn short_stream_binary_overflow() { +// let actual = nu!(r#" +// nu --testbin repeater (0x[01]) 5 | bytes starts-with 0x[010101010101] +// "#); - assert_eq!(actual.out, "false"); -} +// assert_eq!(actual.out, "false"); +// } -#[test] -fn long_stream_binary() { - let actual = nu!(r#" - nu --testbin repeater (0x[01]) 32768 | bytes starts-with 0x[010101] - "#); +// #[test] +// fn long_stream_binary() { +// let actual = nu!(r#" +// nu --testbin repeater (0x[01]) 32768 | bytes starts-with 0x[010101] +// "#); - assert_eq!(actual.out, "true"); -} +// assert_eq!(actual.out, "true"); +// } -#[test] -fn long_stream_binary_overflow() { - // .. ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow - let actual = nu!(r#" - nu --testbin repeater (0x[01]) 32768 | bytes starts-with (0..32768 | each {|| 0x[01] } | bytes collect) - "#); +// #[test] +// fn long_stream_binary_overflow() { +// // .. ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow +// let actual = nu!(r#" +// nu --testbin repeater (0x[01]) 32768 | bytes starts-with (0..32768 | each {|| 0x[01] } | bytes collect) +// "#); - assert_eq!(actual.out, "false"); -} +// assert_eq!(actual.out, "false"); +// } -#[test] -fn long_stream_binary_exact() { - // ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow - let actual = nu!(r#" - nu --testbin repeater (0x[01020304]) 8192 | bytes starts-with (0..<8192 | each {|| 0x[01020304] } | bytes collect) - "#); +// #[test] +// fn long_stream_binary_exact() { +// // ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow +// let actual = nu!(r#" +// nu --testbin repeater (0x[01020304]) 8192 | bytes starts-with (0..<8192 | each {|| 0x[01020304] } | bytes collect) +// "#); - assert_eq!(actual.out, "true"); -} +// assert_eq!(actual.out, "true"); +// } -#[test] -fn long_stream_string_exact() { - // ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow - let actual = nu!(r#" - nu --testbin repeater hell 8192 | bytes starts-with (0..<8192 | each {|| "hell" | into binary } | bytes collect) - "#); +// #[test] +// fn long_stream_string_exact() { +// // ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow +// let actual = nu!(r#" +// nu --testbin repeater hell 8192 | bytes starts-with (0..<8192 | each {|| "hell" | into binary } | bytes collect) +// "#); - assert_eq!(actual.out, "true"); -} +// assert_eq!(actual.out, "true"); +// } -#[test] -fn long_stream_mixed_exact() { - // ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow - let actual = nu!(r#" - let binseg = (0..<2048 | each {|| 0x[003d9fbf] } | bytes collect) - let strseg = (0..<2048 | each {|| "hell" | into binary } | bytes collect) +// #[test] +// fn long_stream_mixed_exact() { +// // ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow +// let actual = nu!(r#" +// let binseg = (0..<2048 | each {|| 0x[003d9fbf] } | bytes collect) +// let strseg = (0..<2048 | each {|| "hell" | into binary } | bytes collect) - nu --testbin repeat_bytes 003d9fbf 2048 68656c6c 2048 | bytes starts-with (bytes build $binseg $strseg) - "#); +// nu --testbin repeat_bytes 003d9fbf 2048 68656c6c 2048 | bytes starts-with (bytes build $binseg $strseg) +// "#); - assert_eq!( - actual.err, "", - "invocation failed. command line limit likely reached" - ); - assert_eq!(actual.out, "true"); -} +// assert_eq!( +// actual.err, "", +// "invocation failed. command line limit likely reached" +// ); +// assert_eq!(actual.out, "true"); +// } -#[test] -fn long_stream_mixed_overflow() { - // ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow - let actual = nu!(r#" - let binseg = (0..<2048 | each {|| 0x[003d9fbf] } | bytes collect) - let strseg = (0..<2048 | each {|| "hell" | into binary } | bytes collect) +// #[test] +// fn long_stream_mixed_overflow() { +// // ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow +// let actual = nu!(r#" +// let binseg = (0..<2048 | each {|| 0x[003d9fbf] } | bytes collect) +// let strseg = (0..<2048 | each {|| "hell" | into binary } | bytes collect) - nu --testbin repeat_bytes 003d9fbf 2048 68656c6c 2048 | bytes starts-with (bytes build $binseg $strseg 0x[01]) - "#); +// nu --testbin repeat_bytes 003d9fbf 2048 68656c6c 2048 | bytes starts-with (bytes build $binseg $strseg 0x[01]) +// "#); - assert_eq!( - actual.err, "", - "invocation failed. command line limit likely reached" - ); - assert_eq!(actual.out, "false"); -} +// assert_eq!( +// actual.err, "", +// "invocation failed. command line limit likely reached" +// ); +// assert_eq!(actual.out, "false"); +// } diff --git a/crates/nu-cmd-lang/src/core_commands/collect.rs b/crates/nu-cmd-lang/src/core_commands/collect.rs index eae41e8690..404aa568da 100644 --- a/crates/nu-cmd-lang/src/core_commands/collect.rs +++ b/crates/nu-cmd-lang/src/core_commands/collect.rs @@ -43,7 +43,7 @@ impl Command for Collect { stack.captures_to_stack_preserve_out_dest(closure.captures.clone()); let metadata = input.metadata(); - let input = input.into_value(call.head); + let input = input.into_value(call.head)?; let mut saved_positional = None; if let Some(var) = block.signature.get_positional(0) { diff --git a/crates/nu-cmd-lang/src/core_commands/describe.rs b/crates/nu-cmd-lang/src/core_commands/describe.rs index e1934f8bad..7d6d7f6f83 100644 --- a/crates/nu-cmd-lang/src/core_commands/describe.rs +++ b/crates/nu-cmd-lang/src/core_commands/describe.rs @@ -1,5 +1,5 @@ use nu_engine::command_prelude::*; -use nu_protocol::{engine::StateWorkingSet, PipelineMetadata}; +use nu_protocol::{engine::StateWorkingSet, ByteStreamSource, PipelineMetadata}; #[derive(Clone)] pub struct Describe; @@ -162,73 +162,38 @@ fn run( let metadata = input.metadata(); let description = match input { - PipelineData::ExternalStream { - ref stdout, - ref stderr, - ref exit_code, - .. - } => { - if options.detailed { - let stdout = if stdout.is_some() { - Value::record( - record! { - "type" => Value::string("stream", head), - "origin" => Value::string("external", head), - "subtype" => Value::string("any", head), - }, - head, - ) - } else { - Value::nothing(head) - }; - - let stderr = if stderr.is_some() { - Value::record( - record! { - "type" => Value::string("stream", head), - "origin" => Value::string("external", head), - "subtype" => Value::string("any", head), - }, - head, - ) - } else { - Value::nothing(head) - }; - - let exit_code = if exit_code.is_some() { - Value::record( - record! { - "type" => Value::string("stream", head), - "origin" => Value::string("external", head), - "subtype" => Value::string("int", head), - }, - head, - ) - } else { - Value::nothing(head) + PipelineData::ByteStream(stream, ..) => { + let description = if options.detailed { + let origin = match stream.source() { + ByteStreamSource::Read(_) => "unknown", + ByteStreamSource::File(_) => "file", + ByteStreamSource::Child(_) => "external", }; Value::record( record! { - "type" => Value::string("stream", head), - "origin" => Value::string("external", head), - "stdout" => stdout, - "stderr" => stderr, - "exit_code" => exit_code, + "type" => Value::string("byte stream", head), + "origin" => Value::string(origin, head), "metadata" => metadata_to_value(metadata, head), }, head, ) } else { - Value::string("raw input", head) + Value::string("byte stream", head) + }; + + if !options.no_collect { + stream.drain()?; } + + description } - PipelineData::ListStream(_, _) => { + PipelineData::ListStream(stream, ..) => { if options.detailed { let subtype = if options.no_collect { Value::string("any", head) } else { - describe_value(input.into_value(head), head, engine_state) + describe_value(stream.into_value(), head, engine_state) }; Value::record( record! { @@ -242,19 +207,19 @@ fn run( } else if options.no_collect { Value::string("stream", head) } else { - let value = input.into_value(head); + let value = stream.into_value(); let base_description = value.get_type().to_string(); Value::string(format!("{} (stream)", base_description), head) } } - _ => { - let value = input.into_value(head); + PipelineData::Value(value, ..) => { if !options.detailed { Value::string(value.get_type().to_string(), head) } else { describe_value(value, head, engine_state) } } + PipelineData::Empty => Value::string(Type::Nothing.to_string(), head), }; Ok(description.into_pipeline_data()) diff --git a/crates/nu-cmd-lang/src/core_commands/do_.rs b/crates/nu-cmd-lang/src/core_commands/do_.rs index b057880cf3..5f14e88c07 100644 --- a/crates/nu-cmd-lang/src/core_commands/do_.rs +++ b/crates/nu-cmd-lang/src/core_commands/do_.rs @@ -1,6 +1,13 @@ use nu_engine::{command_prelude::*, get_eval_block_with_early_return, redirect_env}; -use nu_protocol::{engine::Closure, ListStream, OutDest, RawStream}; -use std::thread; +use nu_protocol::{ + engine::Closure, + process::{ChildPipe, ChildProcess, ExitStatus}, + ByteStream, ByteStreamSource, OutDest, +}; +use std::{ + io::{Cursor, Read}, + thread, +}; #[derive(Clone)] pub struct Do; @@ -86,115 +93,91 @@ impl Command for Do { } match result { - Ok(PipelineData::ExternalStream { - stdout, - stderr, - exit_code, - span, - metadata, - trim_end_newline, - }) if capture_errors => { - // Use a thread to receive stdout message. - // Or we may get a deadlock if child process sends out too much bytes to stderr. - // - // For example: in normal linux system, stderr pipe's limit is 65535 bytes. - // if child process sends out 65536 bytes, the process will be hanged because no consumer - // consumes the first 65535 bytes - // So we need a thread to receive stdout message, then the current thread can continue to consume - // stderr messages. - let stdout_handler = stdout - .map(|stdout_stream| { - thread::Builder::new() - .name("stderr redirector".to_string()) - .spawn(move || { - let ctrlc = stdout_stream.ctrlc.clone(); - let span = stdout_stream.span; - RawStream::new( - Box::new(std::iter::once( - stdout_stream.into_bytes().map(|s| s.item), - )), - ctrlc, - span, - None, - ) + Ok(PipelineData::ByteStream(stream, metadata)) if capture_errors => { + let span = stream.span(); + match stream.into_child() { + Ok(mut child) => { + // Use a thread to receive stdout message. + // Or we may get a deadlock if child process sends out too much bytes to stderr. + // + // For example: in normal linux system, stderr pipe's limit is 65535 bytes. + // if child process sends out 65536 bytes, the process will be hanged because no consumer + // consumes the first 65535 bytes + // So we need a thread to receive stdout message, then the current thread can continue to consume + // stderr messages. + let stdout_handler = child + .stdout + .take() + .map(|mut stdout| { + thread::Builder::new() + .name("stdout consumer".to_string()) + .spawn(move || { + let mut buf = Vec::new(); + stdout.read_to_end(&mut buf)?; + Ok::<_, ShellError>(buf) + }) + .err_span(head) }) - .err_span(head) - }) - .transpose()?; + .transpose()?; - // Intercept stderr so we can return it in the error if the exit code is non-zero. - // The threading issues mentioned above dictate why we also need to intercept stdout. - let mut stderr_ctrlc = None; - let stderr_msg = match stderr { - None => "".to_string(), - Some(stderr_stream) => { - stderr_ctrlc.clone_from(&stderr_stream.ctrlc); - stderr_stream.into_string().map(|s| s.item)? - } - }; + // Intercept stderr so we can return it in the error if the exit code is non-zero. + // The threading issues mentioned above dictate why we also need to intercept stdout. + let stderr_msg = match child.stderr.take() { + None => String::new(), + Some(mut stderr) => { + let mut buf = String::new(); + stderr.read_to_string(&mut buf).err_span(span)?; + buf + } + }; - let stdout = if let Some(handle) = stdout_handler { - match handle.join() { - Err(err) => { + let stdout = if let Some(handle) = stdout_handler { + match handle.join() { + Err(err) => { + return Err(ShellError::ExternalCommand { + label: "Fail to receive external commands stdout message" + .to_string(), + help: format!("{err:?}"), + span, + }); + } + Ok(res) => Some(res?), + } + } else { + None + }; + + if child.wait()? != ExitStatus::Exited(0) { return Err(ShellError::ExternalCommand { - label: "Fail to receive external commands stdout message" - .to_string(), - help: format!("{err:?}"), + label: "External command failed".to_string(), + help: stderr_msg, span, }); } - Ok(res) => Some(res), - } - } else { - None - }; - let exit_code: Vec = match exit_code { - None => vec![], - Some(exit_code_stream) => exit_code_stream.into_iter().collect(), - }; - if let Some(Value::Int { val: code, .. }) = exit_code.last() { - if *code != 0 { - return Err(ShellError::ExternalCommand { - label: "External command failed".to_string(), - help: stderr_msg, - span, - }); + let mut child = ChildProcess::from_raw(None, None, None, span); + if let Some(stdout) = stdout { + child.stdout = Some(ChildPipe::Tee(Box::new(Cursor::new(stdout)))); + } + if !stderr_msg.is_empty() { + child.stderr = Some(ChildPipe::Tee(Box::new(Cursor::new(stderr_msg)))); + } + Ok(PipelineData::ByteStream( + ByteStream::child(child, span), + metadata, + )) } + Err(stream) => Ok(PipelineData::ByteStream(stream, metadata)), } - - Ok(PipelineData::ExternalStream { - stdout, - stderr: Some(RawStream::new( - Box::new(std::iter::once(Ok(stderr_msg.into_bytes()))), - stderr_ctrlc, - span, - None, - )), - exit_code: Some(ListStream::new(exit_code.into_iter(), span, None)), - span, - metadata, - trim_end_newline, - }) } - Ok(PipelineData::ExternalStream { - stdout, - stderr, - exit_code: _, - span, - metadata, - trim_end_newline, - }) if ignore_program_errors - && !matches!(caller_stack.stdout(), OutDest::Pipe | OutDest::Capture) => + Ok(PipelineData::ByteStream(mut stream, metadata)) + if ignore_program_errors + && !matches!(caller_stack.stdout(), OutDest::Pipe | OutDest::Capture) => { - Ok(PipelineData::ExternalStream { - stdout, - stderr, - exit_code: None, - span, - metadata, - trim_end_newline, - }) + if let ByteStreamSource::Child(child) = stream.source_mut() { + child.set_exit_code(0) + } + Ok(PipelineData::ByteStream(stream, metadata)) } Ok(PipelineData::Value(Value::Error { .. }, ..)) | Err(_) if ignore_shell_errors => { Ok(PipelineData::empty()) diff --git a/crates/nu-cmd-lang/src/core_commands/for_.rs b/crates/nu-cmd-lang/src/core_commands/for_.rs index 64e6c0a6ba..6f9391614e 100644 --- a/crates/nu-cmd-lang/src/core_commands/for_.rs +++ b/crates/nu-cmd-lang/src/core_commands/for_.rs @@ -121,12 +121,14 @@ impl Command for For { Err(err) => { return Err(err); } - Ok(pipeline) => { - let exit_code = pipeline.drain_with_exit_code()?; - if exit_code != 0 { - return Ok(PipelineData::new_external_stream_with_only_exit_code( - exit_code, - )); + Ok(data) => { + if let Some(status) = data.drain()? { + let code = status.code(); + if code != 0 { + return Ok( + PipelineData::new_external_stream_with_only_exit_code(code), + ); + } } } } @@ -159,12 +161,14 @@ impl Command for For { Err(err) => { return Err(err); } - Ok(pipeline) => { - let exit_code = pipeline.drain_with_exit_code()?; - if exit_code != 0 { - return Ok(PipelineData::new_external_stream_with_only_exit_code( - exit_code, - )); + Ok(data) => { + if let Some(status) = data.drain()? { + let code = status.code(); + if code != 0 { + return Ok( + PipelineData::new_external_stream_with_only_exit_code(code), + ); + } } } } @@ -173,7 +177,7 @@ impl Command for For { x => { stack.add_var(var_id, x); - eval_block(&engine_state, stack, block, PipelineData::empty())?.into_value(head); + eval_block(&engine_state, stack, block, PipelineData::empty())?.into_value(head)?; } } Ok(PipelineData::empty()) diff --git a/crates/nu-cmd-lang/src/core_commands/let_.rs b/crates/nu-cmd-lang/src/core_commands/let_.rs index c780954bc6..cc5504d8d6 100644 --- a/crates/nu-cmd-lang/src/core_commands/let_.rs +++ b/crates/nu-cmd-lang/src/core_commands/let_.rs @@ -61,7 +61,7 @@ impl Command for Let { let eval_block = get_eval_block(engine_state); let stack = &mut stack.start_capture(); let pipeline_data = eval_block(engine_state, stack, block, input)?; - let value = pipeline_data.into_value(call.head); + let value = pipeline_data.into_value(call.head)?; // if given variable type is Glob, and our result is string // then nushell need to convert from Value::String to Value::Glob diff --git a/crates/nu-cmd-lang/src/core_commands/loop_.rs b/crates/nu-cmd-lang/src/core_commands/loop_.rs index 29f22649eb..9b1e36a057 100644 --- a/crates/nu-cmd-lang/src/core_commands/loop_.rs +++ b/crates/nu-cmd-lang/src/core_commands/loop_.rs @@ -53,12 +53,12 @@ impl Command for Loop { Err(err) => { return Err(err); } - Ok(pipeline) => { - let exit_code = pipeline.drain_with_exit_code()?; - if exit_code != 0 { - return Ok(PipelineData::new_external_stream_with_only_exit_code( - exit_code, - )); + Ok(data) => { + if let Some(status) = data.drain()? { + let code = status.code(); + if code != 0 { + return Ok(PipelineData::new_external_stream_with_only_exit_code(code)); + } } } } diff --git a/crates/nu-cmd-lang/src/core_commands/mut_.rs b/crates/nu-cmd-lang/src/core_commands/mut_.rs index be2d66aff4..60c4c146db 100644 --- a/crates/nu-cmd-lang/src/core_commands/mut_.rs +++ b/crates/nu-cmd-lang/src/core_commands/mut_.rs @@ -61,7 +61,7 @@ impl Command for Mut { let eval_block = get_eval_block(engine_state); let stack = &mut stack.start_capture(); let pipeline_data = eval_block(engine_state, stack, block, input)?; - let value = pipeline_data.into_value(call.head); + let value = pipeline_data.into_value(call.head)?; // if given variable type is Glob, and our result is string // then nushell need to convert from Value::String to Value::Glob diff --git a/crates/nu-cmd-lang/src/core_commands/try_.rs b/crates/nu-cmd-lang/src/core_commands/try_.rs index bc96f3c28a..0b399e368a 100644 --- a/crates/nu-cmd-lang/src/core_commands/try_.rs +++ b/crates/nu-cmd-lang/src/core_commands/try_.rs @@ -62,10 +62,11 @@ impl Command for Try { } // external command may fail to run Ok(pipeline) => { - let (pipeline, external_failed) = pipeline.check_external_failed(); + let (pipeline, external_failed) = pipeline.check_external_failed()?; if external_failed { - let exit_code = pipeline.drain_with_exit_code()?; - stack.add_env_var("LAST_EXIT_CODE".into(), Value::int(exit_code, call.head)); + let status = pipeline.drain()?; + let code = status.map(|status| status.code()).unwrap_or(0); + stack.add_env_var("LAST_EXIT_CODE".into(), Value::int(code.into(), call.head)); let err_value = Value::nothing(call.head); handle_catch(err_value, catch_block, engine_state, stack, eval_block) } else { diff --git a/crates/nu-cmd-lang/src/core_commands/while_.rs b/crates/nu-cmd-lang/src/core_commands/while_.rs index e42e4ab6d1..bf9076aa0c 100644 --- a/crates/nu-cmd-lang/src/core_commands/while_.rs +++ b/crates/nu-cmd-lang/src/core_commands/while_.rs @@ -70,14 +70,16 @@ impl Command for While { Err(err) => { return Err(err); } - Ok(pipeline) => { - let exit_code = pipeline.drain_with_exit_code()?; - if exit_code != 0 { - return Ok( - PipelineData::new_external_stream_with_only_exit_code( - exit_code, - ), - ); + Ok(data) => { + if let Some(status) = data.drain()? { + let code = status.code(); + if code != 0 { + return Ok( + PipelineData::new_external_stream_with_only_exit_code( + code, + ), + ); + } } } } diff --git a/crates/nu-cmd-lang/src/example_support.rs b/crates/nu-cmd-lang/src/example_support.rs index 860572f349..bb03bbaf8c 100644 --- a/crates/nu-cmd-lang/src/example_support.rs +++ b/crates/nu-cmd-lang/src/example_support.rs @@ -122,10 +122,9 @@ pub fn eval_block( stack.add_env_var("PWD".to_string(), Value::test_string(cwd.to_string_lossy())); - match nu_engine::eval_block::(engine_state, &mut stack, &block, input) { - Err(err) => panic!("test eval error in `{}`: {:?}", "TODO", err), - Ok(result) => result.into_value(Span::test_data()), - } + nu_engine::eval_block::(engine_state, &mut stack, &block, input) + .and_then(|data| data.into_value(Span::test_data())) + .unwrap_or_else(|err| panic!("test eval error in `{}`: {:?}", "TODO", err)) } pub fn check_example_evaluates_to_expected_output( diff --git a/crates/nu-color-config/src/style_computer.rs b/crates/nu-color-config/src/style_computer.rs index 2293439183..91907c1428 100644 --- a/crates/nu-color-config/src/style_computer.rs +++ b/crates/nu-color-config/src/style_computer.rs @@ -58,11 +58,11 @@ impl<'a> StyleComputer<'a> { Some(ComputableStyle::Closure(closure, span)) => { let result = ClosureEvalOnce::new(self.engine_state, self.stack, closure.clone()) .debug(false) - .run_with_value(value.clone()); + .run_with_value(value.clone()) + .and_then(|data| data.into_value(*span)); match result { - Ok(v) => { - let value = v.into_value(*span); + Ok(value) => { // These should be the same color data forms supported by color_config. match value { Value::Record { .. } => color_record_to_nustyle(&value), diff --git a/crates/nu-command/src/bytes/starts_with.rs b/crates/nu-command/src/bytes/starts_with.rs index 69187894b4..2d7ca3e26a 100644 --- a/crates/nu-command/src/bytes/starts_with.rs +++ b/crates/nu-command/src/bytes/starts_with.rs @@ -60,63 +60,13 @@ impl Command for BytesStartsWith { pattern, cell_paths, }; - - match input { - PipelineData::ExternalStream { - stdout: Some(stream), - span, - .. - } => { - let mut i = 0; - - for item in stream { - let byte_slice = match &item { - // String and binary data are valid byte patterns - Ok(Value::String { val, .. }) => val.as_bytes(), - Ok(Value::Binary { val, .. }) => val, - // If any Error value is output, echo it back - Ok(v @ Value::Error { .. }) => return Ok(v.clone().into_pipeline_data()), - // Unsupported data - Ok(other) => { - return Ok(Value::error( - ShellError::OnlySupportsThisInputType { - exp_input_type: "string and binary".into(), - wrong_type: other.get_type().to_string(), - dst_span: span, - src_span: other.span(), - }, - span, - ) - .into_pipeline_data()); - } - Err(err) => return Err(err.to_owned()), - }; - - let max = byte_slice.len().min(arg.pattern.len() - i); - - if byte_slice[..max] == arg.pattern[i..i + max] { - i += max; - - if i >= arg.pattern.len() { - return Ok(Value::bool(true, span).into_pipeline_data()); - } - } else { - return Ok(Value::bool(false, span).into_pipeline_data()); - } - } - - // We reached the end of the stream and never returned, - // the pattern wasn't exhausted so it probably doesn't match - Ok(Value::bool(false, span).into_pipeline_data()) - } - _ => operate( - starts_with, - arg, - input, - call.head, - engine_state.ctrlc.clone(), - ), - } + operate( + starts_with, + arg, + input, + call.head, + engine_state.ctrlc.clone(), + ) } fn examples(&self) -> Vec { diff --git a/crates/nu-command/src/charting/histogram.rs b/crates/nu-command/src/charting/histogram.rs index 35a9d82a3d..52964b087d 100755 --- a/crates/nu-command/src/charting/histogram.rs +++ b/crates/nu-command/src/charting/histogram.rs @@ -121,7 +121,7 @@ impl Command for Histogram { }; let span = call.head; - let data_as_value = input.into_value(span); + let data_as_value = input.into_value(span)?; let value_span = data_as_value.span(); // `input` is not a list, here we can return an error. run_histogram( diff --git a/crates/nu-command/src/conversions/into/binary.rs b/crates/nu-command/src/conversions/into/binary.rs index 6fb997a590..479b0fc7d7 100644 --- a/crates/nu-command/src/conversions/into/binary.rs +++ b/crates/nu-command/src/conversions/into/binary.rs @@ -127,25 +127,15 @@ fn into_binary( let cell_paths = call.rest(engine_state, stack, 0)?; let cell_paths = (!cell_paths.is_empty()).then_some(cell_paths); - match input { - PipelineData::ExternalStream { stdout: None, .. } => { - Ok(Value::binary(vec![], head).into_pipeline_data()) - } - PipelineData::ExternalStream { - stdout: Some(stream), - .. - } => { - // TODO: in the future, we may want this to stream out, converting each to bytes - let output = stream.into_bytes()?; - Ok(Value::binary(output.item, head).into_pipeline_data()) - } - _ => { - let args = Arguments { - cell_paths, - compact: call.has_flag(engine_state, stack, "compact")?, - }; - operate(action, args, input, call.head, engine_state.ctrlc.clone()) - } + if let PipelineData::ByteStream(stream, ..) = input { + // TODO: in the future, we may want this to stream out, converting each to bytes + Ok(Value::binary(stream.into_bytes()?, head).into_pipeline_data()) + } else { + let args = Arguments { + cell_paths, + compact: call.has_flag(engine_state, stack, "compact")?, + }; + operate(action, args, input, call.head, engine_state.ctrlc.clone()) } } diff --git a/crates/nu-command/src/conversions/into/cell_path.rs b/crates/nu-command/src/conversions/into/cell_path.rs index 4faa6e83d6..6da317abd3 100644 --- a/crates/nu-command/src/conversions/into/cell_path.rs +++ b/crates/nu-command/src/conversions/into/cell_path.rs @@ -101,11 +101,11 @@ fn into_cell_path(call: &Call, input: PipelineData) -> Result = stream.into_iter().collect(); Ok(list_to_cell_path(&list, head)?.into_pipeline_data()) } - PipelineData::ExternalStream { span, .. } => Err(ShellError::OnlySupportsThisInputType { + PipelineData::ByteStream(stream, ..) => Err(ShellError::OnlySupportsThisInputType { exp_input_type: "list, int".into(), - wrong_type: "raw data".into(), + wrong_type: "byte stream".into(), dst_span: head, - src_span: span, + src_span: stream.span(), }), PipelineData::Empty => Err(ShellError::PipelineEmpty { dst_span: head }), } diff --git a/crates/nu-command/src/conversions/into/glob.rs b/crates/nu-command/src/conversions/into/glob.rs index 8c167b0dc0..e5d03093f4 100644 --- a/crates/nu-command/src/conversions/into/glob.rs +++ b/crates/nu-command/src/conversions/into/glob.rs @@ -82,20 +82,12 @@ fn glob_helper( let head = call.head; let cell_paths = call.rest(engine_state, stack, 0)?; let cell_paths = (!cell_paths.is_empty()).then_some(cell_paths); - let args = Arguments { cell_paths }; - match input { - PipelineData::ExternalStream { stdout: None, .. } => { - Ok(Value::glob(String::new(), false, head).into_pipeline_data()) - } - PipelineData::ExternalStream { - stdout: Some(stream), - .. - } => { - // TODO: in the future, we may want this to stream out, converting each to bytes - let output = stream.into_string()?; - Ok(Value::glob(output.item, false, head).into_pipeline_data()) - } - _ => operate(action, args, input, head, engine_state.ctrlc.clone()), + if let PipelineData::ByteStream(stream, ..) = input { + // TODO: in the future, we may want this to stream out, converting each to bytes + Ok(Value::glob(stream.into_string()?, false, head).into_pipeline_data()) + } else { + let args = Arguments { cell_paths }; + operate(action, args, input, head, engine_state.ctrlc.clone()) } } diff --git a/crates/nu-command/src/conversions/into/record.rs b/crates/nu-command/src/conversions/into/record.rs index c9342e8e39..e867f06e15 100644 --- a/crates/nu-command/src/conversions/into/record.rs +++ b/crates/nu-command/src/conversions/into/record.rs @@ -108,7 +108,7 @@ fn into_record( call: &Call, input: PipelineData, ) -> Result { - let input = input.into_value(call.head); + let input = input.into_value(call.head)?; let input_type = input.get_type(); let span = input.span(); let res = match input { diff --git a/crates/nu-command/src/conversions/into/string.rs b/crates/nu-command/src/conversions/into/string.rs index bc791a37b2..eda4f7e5a5 100644 --- a/crates/nu-command/src/conversions/into/string.rs +++ b/crates/nu-command/src/conversions/into/string.rs @@ -155,26 +155,18 @@ fn string_helper( } let cell_paths = call.rest(engine_state, stack, 0)?; let cell_paths = (!cell_paths.is_empty()).then_some(cell_paths); - let config = engine_state.get_config().clone(); - let args = Arguments { - decimals_value, - cell_paths, - config, - }; - match input { - PipelineData::ExternalStream { stdout: None, .. } => { - Ok(Value::string(String::new(), head).into_pipeline_data()) - } - PipelineData::ExternalStream { - stdout: Some(stream), - .. - } => { - // TODO: in the future, we may want this to stream out, converting each to bytes - let output = stream.into_string()?; - Ok(Value::string(output.item, head).into_pipeline_data()) - } - _ => operate(action, args, input, head, engine_state.ctrlc.clone()), + if let PipelineData::ByteStream(stream, ..) = input { + // TODO: in the future, we may want this to stream out, converting each to bytes + Ok(Value::string(stream.into_string()?, head).into_pipeline_data()) + } else { + let config = engine_state.get_config().clone(); + let args = Arguments { + decimals_value, + cell_paths, + config, + }; + operate(action, args, input, head, engine_state.ctrlc.clone()) } } diff --git a/crates/nu-command/src/database/values/sqlite.rs b/crates/nu-command/src/database/values/sqlite.rs index 9778f44993..483da7672e 100644 --- a/crates/nu-command/src/database/values/sqlite.rs +++ b/crates/nu-command/src/database/values/sqlite.rs @@ -91,7 +91,7 @@ impl SQLiteDatabase { } pub fn try_from_pipeline(input: PipelineData, span: Span) -> Result { - let value = input.into_value(span); + let value = input.into_value(span)?; Self::try_from_value(value) } diff --git a/crates/nu-command/src/debug/inspect.rs b/crates/nu-command/src/debug/inspect.rs index 681d2ef6c6..ad6163c7b6 100644 --- a/crates/nu-command/src/debug/inspect.rs +++ b/crates/nu-command/src/debug/inspect.rs @@ -29,7 +29,7 @@ impl Command for Inspect { input: PipelineData, ) -> Result { let input_metadata = input.metadata(); - let input_val = input.into_value(call.head); + let input_val = input.into_value(call.head)?; if input_val.is_nothing() { return Err(ShellError::PipelineEmpty { dst_span: call.head, diff --git a/crates/nu-command/src/debug/timeit.rs b/crates/nu-command/src/debug/timeit.rs index 92f8fe18cd..a445679b81 100644 --- a/crates/nu-command/src/debug/timeit.rs +++ b/crates/nu-command/src/debug/timeit.rs @@ -53,13 +53,12 @@ impl Command for TimeIt { eval_block(engine_state, stack, block, input)? } else { let eval_expression_with_input = get_eval_expression_with_input(engine_state); - eval_expression_with_input(engine_state, stack, command_to_run, input) - .map(|res| res.0)? + eval_expression_with_input(engine_state, stack, command_to_run, input)?.0 } } else { PipelineData::empty() } - .into_value(call.head); + .into_value(call.head)?; let end_time = Instant::now(); diff --git a/crates/nu-command/src/filesystem/open.rs b/crates/nu-command/src/filesystem/open.rs index 23664bb576..5fb8527511 100644 --- a/crates/nu-command/src/filesystem/open.rs +++ b/crates/nu-command/src/filesystem/open.rs @@ -1,8 +1,8 @@ use super::util::get_rest_for_glob_pattern; #[allow(deprecated)] use nu_engine::{command_prelude::*, current_dir, get_eval_block}; -use nu_protocol::{BufferedReader, DataSource, NuGlob, PipelineMetadata, RawStream}; -use std::{io::BufReader, path::Path}; +use nu_protocol::{ByteStream, DataSource, NuGlob, PipelineMetadata}; +use std::path::Path; #[cfg(feature = "sqlite")] use crate::database::SQLiteDatabase; @@ -143,23 +143,13 @@ impl Command for Open { } }; - let buf_reader = BufReader::new(file); - - let file_contents = PipelineData::ExternalStream { - stdout: Some(RawStream::new( - Box::new(BufferedReader::new(buf_reader)), - ctrlc.clone(), - call_span, - None, - )), - stderr: None, - exit_code: None, - span: call_span, - metadata: Some(PipelineMetadata { + let stream = PipelineData::ByteStream( + ByteStream::file(file, call_span, ctrlc.clone()), + Some(PipelineMetadata { data_source: DataSource::FilePath(path.to_path_buf()), }), - trim_end_newline: false, - }; + ); + let exts_opt: Option> = if raw { None } else { @@ -184,9 +174,9 @@ impl Command for Open { let decl = engine_state.get_decl(converter_id); let command_output = if let Some(block_id) = decl.get_block_id() { let block = engine_state.get_block(block_id); - eval_block(engine_state, stack, block, file_contents) + eval_block(engine_state, stack, block, stream) } else { - decl.run(engine_state, stack, &Call::new(call_span), file_contents) + decl.run(engine_state, stack, &Call::new(call_span), stream) }; output.push(command_output.map_err(|inner| { ShellError::GenericError{ @@ -198,7 +188,7 @@ impl Command for Open { } })?); } - None => output.push(file_contents), + None => output.push(stream), } } } diff --git a/crates/nu-command/src/filesystem/save.rs b/crates/nu-command/src/filesystem/save.rs index 0826284798..ca9943eafb 100644 --- a/crates/nu-command/src/filesystem/save.rs +++ b/crates/nu-command/src/filesystem/save.rs @@ -5,12 +5,15 @@ use nu_engine::{command_prelude::*, current_dir}; use nu_path::expand_path_with; use nu_protocol::{ ast::{Expr, Expression}, - DataSource, OutDest, PipelineMetadata, RawStream, + byte_stream::copy_with_interrupt, + process::ChildPipe, + ByteStreamSource, DataSource, OutDest, PipelineMetadata, }; use std::{ fs::File, - io::Write, + io::{self, BufRead, BufReader, Read, Write}, path::{Path, PathBuf}, + sync::{atomic::AtomicBool, Arc}, thread, }; @@ -104,12 +107,7 @@ impl Command for Save { }); match input { - PipelineData::ExternalStream { - stdout, - stderr, - metadata, - .. - } => { + PipelineData::ByteStream(stream, metadata) => { check_saving_to_source_file(metadata.as_ref(), &path, stderr_path.as_ref())?; let (file, stderr_file) = get_files( @@ -121,40 +119,97 @@ impl Command for Save { force, )?; - match (stdout, stderr) { - (Some(stdout), stderr) => { - // delegate a thread to redirect stderr to result. - let handler = stderr - .map(|stderr| match stderr_file { - Some(stderr_file) => thread::Builder::new() - .name("stderr redirector".to_string()) - .spawn(move || { - stream_to_file(stderr, stderr_file, span, progress) - }), - None => thread::Builder::new() - .name("stderr redirector".to_string()) - .spawn(move || stderr.drain()), - }) - .transpose() - .err_span(span)?; + let size = stream.known_size(); + let ctrlc = engine_state.ctrlc.clone(); - let res = stream_to_file(stdout, file, span, progress); - if let Some(h) = handler { - h.join().map_err(|err| ShellError::ExternalCommand { - label: "Fail to receive external commands stderr message" - .to_string(), - help: format!("{err:?}"), - span, - })??; - } - res?; + match stream.into_source() { + ByteStreamSource::Read(read) => { + stream_to_file(read, size, ctrlc, file, span, progress)?; } - (None, Some(stderr)) => match stderr_file { - Some(stderr_file) => stream_to_file(stderr, stderr_file, span, progress)?, - None => stderr.drain()?, - }, - (None, None) => {} - }; + ByteStreamSource::File(source) => { + stream_to_file(source, size, ctrlc, file, span, progress)?; + } + ByteStreamSource::Child(mut child) => { + fn write_or_consume_stderr( + stderr: ChildPipe, + file: Option, + span: Span, + ctrlc: Option>, + progress: bool, + ) -> Result<(), ShellError> { + if let Some(file) = file { + match stderr { + ChildPipe::Pipe(pipe) => { + stream_to_file(pipe, None, ctrlc, file, span, progress) + } + ChildPipe::Tee(tee) => { + stream_to_file(tee, None, ctrlc, file, span, progress) + } + }? + } else { + match stderr { + ChildPipe::Pipe(mut pipe) => { + io::copy(&mut pipe, &mut io::sink()) + } + ChildPipe::Tee(mut tee) => io::copy(&mut tee, &mut io::sink()), + } + .err_span(span)?; + } + Ok(()) + } + + match (child.stdout.take(), child.stderr.take()) { + (Some(stdout), stderr) => { + // delegate a thread to redirect stderr to result. + let handler = stderr + .map(|stderr| { + let ctrlc = ctrlc.clone(); + thread::Builder::new().name("stderr saver".into()).spawn( + move || { + write_or_consume_stderr( + stderr, + stderr_file, + span, + ctrlc, + progress, + ) + }, + ) + }) + .transpose() + .err_span(span)?; + + let res = match stdout { + ChildPipe::Pipe(pipe) => { + stream_to_file(pipe, None, ctrlc, file, span, progress) + } + ChildPipe::Tee(tee) => { + stream_to_file(tee, None, ctrlc, file, span, progress) + } + }; + if let Some(h) = handler { + h.join().map_err(|err| ShellError::ExternalCommand { + label: "Fail to receive external commands stderr message" + .to_string(), + help: format!("{err:?}"), + span, + })??; + } + res?; + } + (None, Some(stderr)) => { + write_or_consume_stderr( + stderr, + stderr_file, + span, + ctrlc, + progress, + )?; + } + (None, None) => {} + }; + } + } Ok(PipelineData::Empty) } @@ -302,8 +357,7 @@ fn input_to_bytes( ) -> Result, ShellError> { let ext = if raw { None - // if is extern stream , in other words , not value - } else if let PipelineData::ExternalStream { .. } = input { + } else if let PipelineData::ByteStream(..) = input { None } else if let PipelineData::Value(Value::String { .. }, ..) = input { None @@ -318,7 +372,7 @@ fn input_to_bytes( input }; - value_to_bytes(input.into_value(span)) + value_to_bytes(input.into_value(span)?) } /// Convert given data into content of file of specified extension if @@ -448,84 +502,54 @@ fn get_files( } fn stream_to_file( - mut stream: RawStream, + mut source: impl Read, + known_size: Option, + ctrlc: Option>, mut file: File, span: Span, progress: bool, ) -> Result<(), ShellError> { - // https://github.com/nushell/nushell/pull/9377 contains the reason - // for not using BufWriter - let writer = &mut file; + // https://github.com/nushell/nushell/pull/9377 contains the reason for not using `BufWriter` + if progress { + let mut bytes_processed = 0; - let mut bytes_processed: u64 = 0; - let bytes_processed_p = &mut bytes_processed; - let file_total_size = stream.known_size; - let mut process_failed = false; - let process_failed_p = &mut process_failed; + let mut bar = progress_bar::NuProgressBar::new(known_size); - // Create the progress bar - // It looks a bit messy but I am doing it this way to avoid - // creating the bar when is not needed - let (mut bar_opt, bar_opt_clone) = if progress { - let tmp_bar = progress_bar::NuProgressBar::new(file_total_size); - let tmp_bar_clone = tmp_bar.clone(); + // TODO: reduce the number of progress bar updates? - (Some(tmp_bar), Some(tmp_bar_clone)) - } else { - (None, None) - }; + let mut reader = BufReader::new(source); - stream.try_for_each(move |result| { - let buf = match result { - Ok(v) => match v { - Value::String { val, .. } => val.into_bytes(), - Value::Binary { val, .. } => val, - // Propagate errors by explicitly matching them before the final case. - Value::Error { error, .. } => return Err(*error), - other => { - return Err(ShellError::OnlySupportsThisInputType { - exp_input_type: "string or binary".into(), - wrong_type: other.get_type().to_string(), - dst_span: span, - src_span: other.span(), - }); + let res = loop { + if nu_utils::ctrl_c::was_pressed(&ctrlc) { + bar.abandoned_msg("# Cancelled #".to_owned()); + return Ok(()); + } + + match reader.fill_buf() { + Ok(&[]) => break Ok(()), + Ok(buf) => { + file.write_all(buf).err_span(span)?; + let len = buf.len(); + reader.consume(len); + bytes_processed += len as u64; + bar.update_bar(bytes_processed); } - }, - Err(err) => { - *process_failed_p = true; - return Err(err); + Err(e) if e.kind() == io::ErrorKind::Interrupted => continue, + Err(e) => break Err(e), } }; - // If the `progress` flag is set then - if progress { - // Update the total amount of bytes that has been saved and then print the progress bar - *bytes_processed_p += buf.len() as u64; - if let Some(bar) = &mut bar_opt { - bar.update_bar(*bytes_processed_p); - } - } - - if let Err(err) = writer.write_all(&buf) { - *process_failed_p = true; - return Err(ShellError::IOError { - msg: err.to_string(), - }); - } - Ok(()) - })?; - - // If the `progress` flag is set then - if progress { // If the process failed, stop the progress bar with an error message. - if process_failed { - if let Some(bar) = bar_opt_clone { - bar.abandoned_msg("# Error while saving #".to_owned()); - } + if let Err(err) = res { + let _ = file.flush(); + bar.abandoned_msg("# Error while saving #".to_owned()); + Err(err.into_spanned(span).into()) + } else { + file.flush().err_span(span)?; + Ok(()) } + } else { + copy_with_interrupt(&mut source, &mut file, span, ctrlc.as_deref())?; + Ok(()) } - - file.flush()?; - - Ok(()) } diff --git a/crates/nu-command/src/filters/columns.rs b/crates/nu-command/src/filters/columns.rs index b6e15af8df..da8bc5ae57 100644 --- a/crates/nu-command/src/filters/columns.rs +++ b/crates/nu-command/src/filters/columns.rs @@ -125,13 +125,11 @@ fn getcol(head: Span, input: PipelineData) -> Result { .into_pipeline_data() .set_metadata(metadata)) } - PipelineData::ExternalStream { .. } => Err(ShellError::OnlySupportsThisInputType { + PipelineData::ByteStream(stream, ..) => Err(ShellError::OnlySupportsThisInputType { exp_input_type: "record or table".into(), - wrong_type: "raw data".into(), + wrong_type: "byte stream".into(), dst_span: head, - src_span: input - .span() - .expect("PipelineData::ExternalStream had no span"), + src_span: stream.span(), }), } } diff --git a/crates/nu-command/src/filters/drop/column.rs b/crates/nu-command/src/filters/drop/column.rs index 3354492570..01c13deee4 100644 --- a/crates/nu-command/src/filters/drop/column.rs +++ b/crates/nu-command/src/filters/drop/column.rs @@ -133,11 +133,11 @@ fn drop_cols( } } PipelineData::Empty => Ok(PipelineData::Empty), - PipelineData::ExternalStream { span, .. } => Err(ShellError::OnlySupportsThisInputType { + PipelineData::ByteStream(stream, ..) => Err(ShellError::OnlySupportsThisInputType { exp_input_type: "table or record".into(), - wrong_type: "raw data".into(), + wrong_type: "byte stream".into(), dst_span: head, - src_span: span, + src_span: stream.span(), }), } } diff --git a/crates/nu-command/src/filters/each.rs b/crates/nu-command/src/filters/each.rs index 65d61fd3a8..a074f63abb 100644 --- a/crates/nu-command/src/filters/each.rs +++ b/crates/nu-command/src/filters/each.rs @@ -129,7 +129,9 @@ with 'transpose' first."# } Some(Value::list(vals, span)) } - Ok(data) => Some(data.into_value(head)), + Ok(data) => Some(data.into_value(head).unwrap_or_else(|err| { + Value::error(chain_error_with_input(err, is_error, span), span) + })), Err(ShellError::Continue { span }) => Some(Value::nothing(span)), Err(ShellError::Break { .. }) => None, Err(error) => { @@ -140,37 +142,39 @@ with 'transpose' first."# }) .into_pipeline_data(head, engine_state.ctrlc.clone())) } - PipelineData::ExternalStream { stdout: None, .. } => Ok(PipelineData::empty()), - PipelineData::ExternalStream { - stdout: Some(stream), - .. - } => { - let mut closure = ClosureEval::new(engine_state, stack, closure); - Ok(stream - .into_iter() - .map_while(move |value| { - let value = match value { - Ok(value) => value, - Err(ShellError::Continue { span }) => { - return Some(Value::nothing(span)) - } - Err(ShellError::Break { .. }) => return None, - Err(err) => return Some(Value::error(err, head)), - }; + PipelineData::ByteStream(stream, ..) => { + if let Some(chunks) = stream.chunks() { + let mut closure = ClosureEval::new(engine_state, stack, closure); + Ok(chunks + .map_while(move |value| { + let value = match value { + Ok(value) => value, + Err(ShellError::Continue { span }) => { + return Some(Value::nothing(span)) + } + Err(ShellError::Break { .. }) => return None, + Err(err) => return Some(Value::error(err, head)), + }; - let span = value.span(); - let is_error = value.is_error(); - match closure.run_with_value(value) { - Ok(data) => Some(data.into_value(head)), - Err(ShellError::Continue { span }) => Some(Value::nothing(span)), - Err(ShellError::Break { .. }) => None, - Err(error) => { - let error = chain_error_with_input(error, is_error, span); - Some(Value::error(error, span)) + let span = value.span(); + let is_error = value.is_error(); + match closure + .run_with_value(value) + .and_then(|data| data.into_value(head)) + { + Ok(value) => Some(value), + Err(ShellError::Continue { span }) => Some(Value::nothing(span)), + Err(ShellError::Break { .. }) => None, + Err(error) => { + let error = chain_error_with_input(error, is_error, span); + Some(Value::error(error, span)) + } } - } - }) - .into_pipeline_data(head, engine_state.ctrlc.clone())) + }) + .into_pipeline_data(head, engine_state.ctrlc.clone())) + } else { + Ok(PipelineData::Empty) + } } // This match allows non-iterables to be accepted, // which is currently considered undesirable (Nov 2022). diff --git a/crates/nu-command/src/filters/empty.rs b/crates/nu-command/src/filters/empty.rs index fd55921414..f4dd428b77 100644 --- a/crates/nu-command/src/filters/empty.rs +++ b/crates/nu-command/src/filters/empty.rs @@ -1,4 +1,5 @@ use nu_engine::command_prelude::*; +use std::io::Read; pub fn empty( engine_state: &EngineState, @@ -36,29 +37,26 @@ pub fn empty( } else { match input { PipelineData::Empty => Ok(PipelineData::Empty), - PipelineData::ExternalStream { stdout, .. } => match stdout { - Some(s) => { - let bytes = s.into_bytes(); - - match bytes { - Ok(s) => { - if negate { - Ok(Value::bool(!s.item.is_empty(), head).into_pipeline_data()) - } else { - Ok(Value::bool(s.item.is_empty(), head).into_pipeline_data()) - } + PipelineData::ByteStream(stream, ..) => { + let span = stream.span(); + match stream.reader() { + Some(reader) => { + let is_empty = reader.bytes().next().transpose().err_span(span)?.is_none(); + if negate { + Ok(Value::bool(!is_empty, head).into_pipeline_data()) + } else { + Ok(Value::bool(is_empty, head).into_pipeline_data()) + } + } + None => { + if negate { + Ok(Value::bool(false, head).into_pipeline_data()) + } else { + Ok(Value::bool(true, head).into_pipeline_data()) } - Err(err) => Err(err), } } - None => { - if negate { - Ok(Value::bool(false, head).into_pipeline_data()) - } else { - Ok(Value::bool(true, head).into_pipeline_data()) - } - } - }, + } PipelineData::ListStream(s, ..) => { let empty = s.into_iter().next().is_none(); if negate { diff --git a/crates/nu-command/src/filters/filter.rs b/crates/nu-command/src/filters/filter.rs index b158dd3be3..1ba1508839 100644 --- a/crates/nu-command/src/filters/filter.rs +++ b/crates/nu-command/src/filters/filter.rs @@ -58,33 +58,13 @@ a variable. On the other hand, the "row condition" syntax is not supported."# | PipelineData::ListStream(..) => { let mut closure = ClosureEval::new(engine_state, stack, closure); Ok(input - .into_iter() - .filter_map(move |value| match closure.run_with_value(value.clone()) { - Ok(pred) => pred.into_value(head).is_true().then_some(value), - Err(err) => { - let span = value.span(); - let err = chain_error_with_input(err, value.is_error(), span); - Some(Value::error(err, span)) - } - }) - .into_pipeline_data(head, engine_state.ctrlc.clone())) - } - PipelineData::ExternalStream { stdout: None, .. } => Ok(PipelineData::empty()), - PipelineData::ExternalStream { - stdout: Some(stream), - .. - } => { - let mut closure = ClosureEval::new(engine_state, stack, closure); - Ok(stream .into_iter() .filter_map(move |value| { - let value = match value { - Ok(value) => value, - Err(err) => return Some(Value::error(err, head)), - }; - - match closure.run_with_value(value.clone()) { - Ok(pred) => pred.into_value(head).is_true().then_some(value), + match closure + .run_with_value(value.clone()) + .and_then(|data| data.into_value(head)) + { + Ok(cond) => cond.is_true().then_some(value), Err(err) => { let span = value.span(); let err = chain_error_with_input(err, value.is_error(), span); @@ -94,14 +74,43 @@ a variable. On the other hand, the "row condition" syntax is not supported."# }) .into_pipeline_data(head, engine_state.ctrlc.clone())) } + PipelineData::ByteStream(stream, ..) => { + if let Some(chunks) = stream.chunks() { + let mut closure = ClosureEval::new(engine_state, stack, closure); + Ok(chunks + .into_iter() + .filter_map(move |value| { + let value = match value { + Ok(value) => value, + Err(err) => return Some(Value::error(err, head)), + }; + + match closure + .run_with_value(value.clone()) + .and_then(|data| data.into_value(head)) + { + Ok(cond) => cond.is_true().then_some(value), + Err(err) => { + let span = value.span(); + let err = chain_error_with_input(err, value.is_error(), span); + Some(Value::error(err, span)) + } + } + }) + .into_pipeline_data(head, engine_state.ctrlc.clone())) + } else { + Ok(PipelineData::Empty) + } + } // This match allows non-iterables to be accepted, // which is currently considered undesirable (Nov 2022). PipelineData::Value(value, ..) => { let result = ClosureEvalOnce::new(engine_state, stack, closure) - .run_with_value(value.clone()); + .run_with_value(value.clone()) + .and_then(|data| data.into_value(head)); Ok(match result { - Ok(pred) => pred.into_value(head).is_true().then_some(value), + Ok(cond) => cond.is_true().then_some(value), Err(err) => { let span = value.span(); let err = chain_error_with_input(err, value.is_error(), span); diff --git a/crates/nu-command/src/filters/find.rs b/crates/nu-command/src/filters/find.rs index b45fe8d810..dfdef66969 100644 --- a/crates/nu-command/src/filters/find.rs +++ b/crates/nu-command/src/filters/find.rs @@ -447,57 +447,35 @@ fn find_with_rest_and_highlight( Ok(PipelineData::ListStream(stream, metadata)) } - PipelineData::ExternalStream { stdout: None, .. } => Ok(PipelineData::empty()), - PipelineData::ExternalStream { - stdout: Some(stream), - .. - } => { - let mut output: Vec = vec![]; - for filter_val in stream { - match filter_val { - Ok(value) => { - let span = value.span(); - match value { - Value::String { val, .. } => { - let split_char = if val.contains("\r\n") { "\r\n" } else { "\n" }; + PipelineData::ByteStream(stream, ..) => { + let span = stream.span(); + if let Some(lines) = stream.lines() { + let terms = lower_terms + .into_iter() + .map(|term| term.to_expanded_string("", &filter_config).to_lowercase()) + .collect::>(); - for line in val.split(split_char) { - for term in lower_terms.iter() { - let term_str = term.to_expanded_string("", &filter_config); - let lower_val = line.to_lowercase(); - if lower_val.contains( - &term.to_expanded_string("", &config).to_lowercase(), - ) { - output.push(Value::string( - highlight_search_string( - line, - &term_str, - &string_style, - &highlight_style, - )?, - span, - )) - } - } - } - } - // Propagate errors by explicitly matching them before the final case. - Value::Error { error, .. } => return Err(*error), - other => { - return Err(ShellError::UnsupportedInput { - msg: "unsupported type from raw stream".into(), - input: format!("input: {:?}", other.get_type()), - msg_span: span, - input_span: other.span(), - }); - } + let mut output: Vec = vec![]; + for line in lines { + let line = line?.to_lowercase(); + for term in &terms { + if line.contains(term) { + output.push(Value::string( + highlight_search_string( + &line, + term, + &string_style, + &highlight_style, + )?, + span, + )) } } - // Propagate any errors that were in the stream - Err(e) => return Err(e), - }; + } + Ok(Value::list(output, span).into_pipeline_data()) + } else { + Ok(PipelineData::Empty) } - Ok(output.into_pipeline_data(span, ctrlc)) } } } diff --git a/crates/nu-command/src/filters/first.rs b/crates/nu-command/src/filters/first.rs index 1bc51f2562..e581c3e84d 100644 --- a/crates/nu-command/src/filters/first.rs +++ b/crates/nu-command/src/filters/first.rs @@ -170,11 +170,11 @@ fn first_helper( )) } } - PipelineData::ExternalStream { span, .. } => Err(ShellError::OnlySupportsThisInputType { + PipelineData::ByteStream(stream, ..) => Err(ShellError::OnlySupportsThisInputType { exp_input_type: "list, binary or range".into(), - wrong_type: "raw data".into(), + wrong_type: "byte stream".into(), dst_span: head, - src_span: span, + src_span: stream.span(), }), PipelineData::Empty => Err(ShellError::OnlySupportsThisInputType { exp_input_type: "list, binary or range".into(), diff --git a/crates/nu-command/src/filters/get.rs b/crates/nu-command/src/filters/get.rs index a481372db1..07f0ea9440 100644 --- a/crates/nu-command/src/filters/get.rs +++ b/crates/nu-command/src/filters/get.rs @@ -81,7 +81,7 @@ If multiple cell paths are given, this will produce a list of values."# let paths = std::iter::once(cell_path).chain(rest); - let input = input.into_value(span); + let input = input.into_value(span)?; for path in paths { let val = input.clone().follow_cell_path(&path.members, !sensitive); diff --git a/crates/nu-command/src/filters/group_by.rs b/crates/nu-command/src/filters/group_by.rs index 24559c1eca..c1d76ebe08 100644 --- a/crates/nu-command/src/filters/group_by.rs +++ b/crates/nu-command/src/filters/group_by.rs @@ -207,7 +207,7 @@ fn group_closure( for value in values { let key = closure .run_with_value(value.clone())? - .into_value(span) + .into_value(span)? .coerce_into_string()?; groups.entry(key).or_default().push(value); diff --git a/crates/nu-command/src/filters/headers.rs b/crates/nu-command/src/filters/headers.rs index d7492d0b76..6e63c33ff9 100644 --- a/crates/nu-command/src/filters/headers.rs +++ b/crates/nu-command/src/filters/headers.rs @@ -66,7 +66,7 @@ impl Command for Headers { let config = engine_state.get_config(); let metadata = input.metadata(); let span = input.span().unwrap_or(call.head); - let value = input.into_value(span); + let value = input.into_value(span)?; let Value::List { vals: table, .. } = value else { return Err(ShellError::TypeMismatch { err_message: "not a table".to_string(), diff --git a/crates/nu-command/src/filters/insert.rs b/crates/nu-command/src/filters/insert.rs index d9fb165a16..e8794304c8 100644 --- a/crates/nu-command/src/filters/insert.rs +++ b/crates/nu-command/src/filters/insert.rs @@ -190,7 +190,7 @@ fn insert( let value = value.unwrap_or(Value::nothing(head)); let new_value = ClosureEvalOnce::new(engine_state, stack, *val) .run_with_value(value.clone())? - .into_value(head); + .into_value(head)?; pre_elems.push(new_value); if !end_of_stream { @@ -261,8 +261,8 @@ fn insert( type_name: "empty pipeline".to_string(), span: head, }), - PipelineData::ExternalStream { .. } => Err(ShellError::IncompatiblePathAccess { - type_name: "external stream".to_string(), + PipelineData::ByteStream(..) => Err(ShellError::IncompatiblePathAccess { + type_name: "byte stream".to_string(), span: head, }), } @@ -284,7 +284,7 @@ fn insert_value_by_closure( value.clone() }; - let new_value = closure.run_with_value(value_at_path)?.into_value(span); + let new_value = closure.run_with_value(value_at_path)?.into_value(span)?; value.insert_data_at_cell_path(cell_path, new_value, span) } @@ -304,7 +304,7 @@ fn insert_single_value_by_closure( value.clone() }; - let new_value = closure.run_with_value(value_at_path)?.into_value(span); + let new_value = closure.run_with_value(value_at_path)?.into_value(span)?; value.insert_data_at_cell_path(cell_path, new_value, span) } diff --git a/crates/nu-command/src/filters/items.rs b/crates/nu-command/src/filters/items.rs index f0cba01888..6afc0bc536 100644 --- a/crates/nu-command/src/filters/items.rs +++ b/crates/nu-command/src/filters/items.rs @@ -55,10 +55,11 @@ impl Command for Items { let result = closure .add_arg(Value::string(col, span)) .add_arg(val) - .run_with_input(PipelineData::Empty); + .run_with_input(PipelineData::Empty) + .and_then(|data| data.into_value(head)); match result { - Ok(data) => Some(data.into_value(head)), + Ok(value) => Some(value), Err(ShellError::Break { .. }) => None, Err(err) => { let err = chain_error_with_input(err, false, span); @@ -77,20 +78,18 @@ impl Command for Items { }), } } - PipelineData::ListStream(..) => Err(ShellError::OnlySupportsThisInputType { + PipelineData::ListStream(stream, ..) => Err(ShellError::OnlySupportsThisInputType { exp_input_type: "record".into(), wrong_type: "stream".into(), - dst_span: head, - src_span: head, + dst_span: call.head, + src_span: stream.span(), + }), + PipelineData::ByteStream(stream, ..) => Err(ShellError::OnlySupportsThisInputType { + exp_input_type: "record".into(), + wrong_type: "byte stream".into(), + dst_span: call.head, + src_span: stream.span(), }), - PipelineData::ExternalStream { span, .. } => { - Err(ShellError::OnlySupportsThisInputType { - exp_input_type: "record".into(), - wrong_type: "raw data".into(), - dst_span: head, - src_span: span, - }) - } } .map(|data| data.set_metadata(metadata)) } diff --git a/crates/nu-command/src/filters/join.rs b/crates/nu-command/src/filters/join.rs index 343cc0eb19..f5e6d63deb 100644 --- a/crates/nu-command/src/filters/join.rs +++ b/crates/nu-command/src/filters/join.rs @@ -75,7 +75,7 @@ impl Command for Join { let join_type = join_type(engine_state, stack, call)?; // FIXME: we should handle ListStreams properly instead of collecting - let collected_input = input.into_value(span); + let collected_input = input.into_value(span)?; match (&collected_input, &table_2, &l_on, &r_on) { ( diff --git a/crates/nu-command/src/filters/last.rs b/crates/nu-command/src/filters/last.rs index f41b7c7e4d..7530126c26 100644 --- a/crates/nu-command/src/filters/last.rs +++ b/crates/nu-command/src/filters/last.rs @@ -160,14 +160,12 @@ impl Command for Last { }), } } - PipelineData::ExternalStream { span, .. } => { - Err(ShellError::OnlySupportsThisInputType { - exp_input_type: "list, binary or range".into(), - wrong_type: "raw data".into(), - dst_span: head, - src_span: span, - }) - } + PipelineData::ByteStream(stream, ..) => Err(ShellError::OnlySupportsThisInputType { + exp_input_type: "list, binary or range".into(), + wrong_type: "byte stream".into(), + dst_span: head, + src_span: stream.span(), + }), PipelineData::Empty => Err(ShellError::OnlySupportsThisInputType { exp_input_type: "list, binary or range".into(), wrong_type: "null".into(), diff --git a/crates/nu-command/src/filters/lines.rs b/crates/nu-command/src/filters/lines.rs index e3e0f5d9fd..0b037dcaac 100644 --- a/crates/nu-command/src/filters/lines.rs +++ b/crates/nu-command/src/filters/lines.rs @@ -1,6 +1,4 @@ use nu_engine::command_prelude::*; -use nu_protocol::RawStream; -use std::collections::VecDeque; #[derive(Clone)] pub struct Lines; @@ -33,23 +31,33 @@ impl Command for Lines { let span = input.span().unwrap_or(call.head); match input { - PipelineData::Value(Value::String { val, .. }, ..) => { - let lines = if skip_empty { - val.lines() - .filter_map(|s| { - if s.trim().is_empty() { - None - } else { - Some(Value::string(s, span)) - } - }) - .collect() - } else { - val.lines().map(|s| Value::string(s, span)).collect() - }; + PipelineData::Value(value, ..) => match value { + Value::String { val, .. } => { + let lines = if skip_empty { + val.lines() + .filter_map(|s| { + if s.trim().is_empty() { + None + } else { + Some(Value::string(s, span)) + } + }) + .collect() + } else { + val.lines().map(|s| Value::string(s, span)).collect() + }; - Ok(Value::list(lines, span).into_pipeline_data()) - } + Ok(Value::list(lines, span).into_pipeline_data()) + } + // Propagate existing errors + Value::Error { error, .. } => Err(*error), + value => Err(ShellError::OnlySupportsThisInputType { + exp_input_type: "string or byte stream".into(), + wrong_type: value.get_type().to_string(), + dst_span: head, + src_span: value.span(), + }), + }, PipelineData::Empty => Ok(PipelineData::Empty), PipelineData::ListStream(stream, metadata) => { let stream = stream.modify(|iter| { @@ -76,27 +84,18 @@ impl Command for Lines { Ok(PipelineData::ListStream(stream, metadata)) } - PipelineData::Value(val, ..) => { - match val { - // Propagate existing errors - Value::Error { error, .. } => Err(*error), - _ => Err(ShellError::OnlySupportsThisInputType { - exp_input_type: "string or raw data".into(), - wrong_type: val.get_type().to_string(), - dst_span: head, - src_span: val.span(), - }), + PipelineData::ByteStream(stream, ..) => { + if let Some(lines) = stream.lines() { + Ok(lines + .map(move |line| match line { + Ok(line) => Value::string(line, head), + Err(err) => Value::error(err, head), + }) + .into_pipeline_data(head, ctrlc)) + } else { + Ok(PipelineData::empty()) } } - PipelineData::ExternalStream { stdout: None, .. } => Ok(PipelineData::empty()), - PipelineData::ExternalStream { - stdout: Some(stream), - metadata, - .. - } => Ok(RawStreamLinesAdapter::new(stream, head, skip_empty) - .map(move |x| x.unwrap_or_else(|err| Value::error(err, head))) - .into_pipeline_data(head, ctrlc) - .set_metadata(metadata)), } } @@ -112,108 +111,6 @@ impl Command for Lines { } } -#[derive(Debug)] -struct RawStreamLinesAdapter { - inner: RawStream, - inner_complete: bool, - skip_empty: bool, - span: Span, - incomplete_line: String, - queue: VecDeque, -} - -impl Iterator for RawStreamLinesAdapter { - type Item = Result; - - fn next(&mut self) -> Option { - loop { - if let Some(s) = self.queue.pop_front() { - if self.skip_empty && s.trim().is_empty() { - continue; - } - return Some(Ok(Value::string(s, self.span))); - } else { - // inner is complete, feed out remaining state - if self.inner_complete { - return if self.incomplete_line.is_empty() { - None - } else { - Some(Ok(Value::string( - std::mem::take(&mut self.incomplete_line), - self.span, - ))) - }; - } - - // pull more data from inner - if let Some(result) = self.inner.next() { - match result { - Ok(v) => { - let span = v.span(); - match v { - // TODO: Value::Binary support required? - Value::String { val, .. } => { - self.span = span; - - let mut lines = val.lines(); - - // handle incomplete line from previous - if !self.incomplete_line.is_empty() { - if let Some(first) = lines.next() { - self.incomplete_line.push_str(first); - self.queue.push_back(std::mem::take( - &mut self.incomplete_line, - )); - } - } - - // save completed lines - self.queue.extend(lines.map(String::from)); - - if !val.ends_with('\n') { - // incomplete line, save for next time - // if `val` and `incomplete_line` were empty, - // then pop will return none - if let Some(s) = self.queue.pop_back() { - self.incomplete_line = s; - } - } - } - // Propagate errors by explicitly matching them before the final case. - Value::Error { error, .. } => return Some(Err(*error)), - other => { - return Some(Err(ShellError::OnlySupportsThisInputType { - exp_input_type: "string".into(), - wrong_type: other.get_type().to_string(), - dst_span: self.span, - src_span: other.span(), - })); - } - } - } - Err(err) => return Some(Err(err)), - } - } else { - self.inner_complete = true; - } - } - } - } -} - -impl RawStreamLinesAdapter { - pub fn new(inner: RawStream, span: Span, skip_empty: bool) -> Self { - Self { - inner, - span, - skip_empty, - incomplete_line: String::new(), - queue: VecDeque::new(), - inner_complete: false, - } - } -} - #[cfg(test)] mod test { use super::*; diff --git a/crates/nu-command/src/filters/par_each.rs b/crates/nu-command/src/filters/par_each.rs index 52c3024270..af72895df6 100644 --- a/crates/nu-command/src/filters/par_each.rs +++ b/crates/nu-command/src/filters/par_each.rs @@ -143,17 +143,16 @@ impl Command for ParEach { .map(move |(index, value)| { let span = value.span(); let is_error = value.is_error(); - let result = + let value = ClosureEvalOnce::new(engine_state, stack, closure.clone()) - .run_with_value(value); - - let value = match result { - Ok(data) => data.into_value(span), - Err(err) => Value::error( - chain_error_with_input(err, is_error, span), - span, - ), - }; + .run_with_value(value) + .and_then(|data| data.into_value(span)) + .unwrap_or_else(|err| { + Value::error( + chain_error_with_input(err, is_error, span), + span, + ) + }); (index, value) }) @@ -170,17 +169,16 @@ impl Command for ParEach { .map(move |(index, value)| { let span = value.span(); let is_error = value.is_error(); - let result = + let value = ClosureEvalOnce::new(engine_state, stack, closure.clone()) - .run_with_value(value); - - let value = match result { - Ok(data) => data.into_value(span), - Err(err) => Value::error( - chain_error_with_input(err, is_error, span), - span, - ), - }; + .run_with_value(value) + .and_then(|data| data.into_value(span)) + .unwrap_or_else(|err| { + Value::error( + chain_error_with_input(err, is_error, span), + span, + ) + }); (index, value) }) @@ -203,40 +201,12 @@ impl Command for ParEach { .map(move |(index, value)| { let span = value.span(); let is_error = value.is_error(); - let result = ClosureEvalOnce::new(engine_state, stack, closure.clone()) - .run_with_value(value); - - let value = match result { - Ok(data) => data.into_value(head), - Err(err) => { - Value::error(chain_error_with_input(err, is_error, span), span) - } - }; - - (index, value) - }) - .collect::>(); - - apply_order(vec).into_pipeline_data(head, engine_state.ctrlc.clone()) - })), - PipelineData::ExternalStream { stdout: None, .. } => Ok(PipelineData::empty()), - PipelineData::ExternalStream { - stdout: Some(stream), - .. - } => Ok(create_pool(max_threads)?.install(|| { - let vec = stream - .enumerate() - .par_bridge() - .map(move |(index, value)| { - let value = match value { - Ok(value) => value, - Err(err) => return (index, Value::error(err, head)), - }; - let value = ClosureEvalOnce::new(engine_state, stack, closure.clone()) .run_with_value(value) - .map(|data| data.into_value(head)) - .unwrap_or_else(|err| Value::error(err, head)); + .and_then(|data| data.into_value(head)) + .unwrap_or_else(|err| { + Value::error(chain_error_with_input(err, is_error, span), span) + }); (index, value) }) @@ -244,6 +214,34 @@ impl Command for ParEach { apply_order(vec).into_pipeline_data(head, engine_state.ctrlc.clone()) })), + PipelineData::ByteStream(stream, ..) => { + if let Some(chunks) = stream.chunks() { + Ok(create_pool(max_threads)?.install(|| { + let vec = chunks + .enumerate() + .par_bridge() + .map(move |(index, value)| { + let value = match value { + Ok(value) => value, + Err(err) => return (index, Value::error(err, head)), + }; + + let value = + ClosureEvalOnce::new(engine_state, stack, closure.clone()) + .run_with_value(value) + .and_then(|data| data.into_value(head)) + .unwrap_or_else(|err| Value::error(err, head)); + + (index, value) + }) + .collect::>(); + + apply_order(vec).into_pipeline_data(head, engine_state.ctrlc.clone()) + })) + } else { + Ok(PipelineData::empty()) + } + } } .and_then(|x| x.filter(|v| !v.is_nothing(), engine_state.ctrlc.clone())) .map(|data| data.set_metadata(metadata)) diff --git a/crates/nu-command/src/filters/reduce.rs b/crates/nu-command/src/filters/reduce.rs index 756fe051a9..fc808ca9af 100644 --- a/crates/nu-command/src/filters/reduce.rs +++ b/crates/nu-command/src/filters/reduce.rs @@ -115,7 +115,7 @@ impl Command for Reduce { .add_arg(value) .add_arg(acc) .run_with_input(PipelineData::Empty)? - .into_value(head); + .into_value(head)?; } Ok(acc.with_span(head).into_pipeline_data()) diff --git a/crates/nu-command/src/filters/reject.rs b/crates/nu-command/src/filters/reject.rs index 251e92c905..f8583d3f47 100644 --- a/crates/nu-command/src/filters/reject.rs +++ b/crates/nu-command/src/filters/reject.rs @@ -173,7 +173,7 @@ fn reject( ) -> Result { let mut unique_rows: HashSet = HashSet::new(); let metadata = input.metadata(); - let val = input.into_value(span); + let val = input.into_value(span)?; let mut val = val; let mut new_columns = vec![]; let mut new_rows = vec![]; diff --git a/crates/nu-command/src/filters/skip/skip_.rs b/crates/nu-command/src/filters/skip/skip_.rs index 1919263aa3..9048b34a58 100644 --- a/crates/nu-command/src/filters/skip/skip_.rs +++ b/crates/nu-command/src/filters/skip/skip_.rs @@ -87,15 +87,14 @@ impl Command for Skip { let ctrlc = engine_state.ctrlc.clone(); let input_span = input.span().unwrap_or(call.head); match input { - PipelineData::ExternalStream { .. } => Err(ShellError::OnlySupportsThisInputType { + PipelineData::ByteStream(stream, ..) => Err(ShellError::OnlySupportsThisInputType { exp_input_type: "list, binary or range".into(), - wrong_type: "raw data".into(), + wrong_type: "byte stream".into(), dst_span: call.head, - src_span: input_span, + src_span: stream.span(), }), PipelineData::Value(Value::Binary { val, .. }, metadata) => { let bytes = val.into_iter().skip(n).collect::>(); - Ok(Value::binary(bytes, input_span).into_pipeline_data_with_metadata(metadata)) } _ => Ok(input diff --git a/crates/nu-command/src/filters/skip/skip_until.rs b/crates/nu-command/src/filters/skip/skip_until.rs index 74deeda84d..bb36785e00 100644 --- a/crates/nu-command/src/filters/skip/skip_until.rs +++ b/crates/nu-command/src/filters/skip/skip_until.rs @@ -85,7 +85,8 @@ impl Command for SkipUntil { .skip_while(move |value| { closure .run_with_value(value.clone()) - .map(|data| data.into_value(head).is_false()) + .and_then(|data| data.into_value(head)) + .map(|cond| cond.is_false()) .unwrap_or(false) }) .into_pipeline_data_with_metadata(head, engine_state.ctrlc.clone(), metadata)) diff --git a/crates/nu-command/src/filters/skip/skip_while.rs b/crates/nu-command/src/filters/skip/skip_while.rs index a832d8f7b1..2747ea6f97 100644 --- a/crates/nu-command/src/filters/skip/skip_while.rs +++ b/crates/nu-command/src/filters/skip/skip_while.rs @@ -90,7 +90,8 @@ impl Command for SkipWhile { .skip_while(move |value| { closure .run_with_value(value.clone()) - .map(|data| data.into_value(head).is_true()) + .and_then(|data| data.into_value(head)) + .map(|cond| cond.is_true()) .unwrap_or(false) }) .into_pipeline_data_with_metadata(head, engine_state.ctrlc.clone(), metadata)) diff --git a/crates/nu-command/src/filters/take/take_.rs b/crates/nu-command/src/filters/take/take_.rs index 01700420b8..12840aa8d6 100644 --- a/crates/nu-command/src/filters/take/take_.rs +++ b/crates/nu-command/src/filters/take/take_.rs @@ -78,14 +78,12 @@ impl Command for Take { stream.modify(|iter| iter.take(rows_desired)), metadata, )), - PipelineData::ExternalStream { span, .. } => { - Err(ShellError::OnlySupportsThisInputType { - exp_input_type: "list, binary or range".into(), - wrong_type: "raw data".into(), - dst_span: head, - src_span: span, - }) - } + PipelineData::ByteStream(stream, ..) => Err(ShellError::OnlySupportsThisInputType { + exp_input_type: "list, binary or range".into(), + wrong_type: "byte stream".into(), + dst_span: head, + src_span: stream.span(), + }), PipelineData::Empty => Err(ShellError::OnlySupportsThisInputType { exp_input_type: "list, binary or range".into(), wrong_type: "null".into(), diff --git a/crates/nu-command/src/filters/take/take_until.rs b/crates/nu-command/src/filters/take/take_until.rs index e3a2a37162..0df2407cb1 100644 --- a/crates/nu-command/src/filters/take/take_until.rs +++ b/crates/nu-command/src/filters/take/take_until.rs @@ -81,7 +81,8 @@ impl Command for TakeUntil { .take_while(move |value| { closure .run_with_value(value.clone()) - .map(|data| data.into_value(head).is_false()) + .and_then(|data| data.into_value(head)) + .map(|cond| cond.is_false()) .unwrap_or(false) }) .into_pipeline_data_with_metadata(head, engine_state.ctrlc.clone(), metadata)) diff --git a/crates/nu-command/src/filters/take/take_while.rs b/crates/nu-command/src/filters/take/take_while.rs index 632c165847..7c282ac38a 100644 --- a/crates/nu-command/src/filters/take/take_while.rs +++ b/crates/nu-command/src/filters/take/take_while.rs @@ -81,7 +81,8 @@ impl Command for TakeWhile { .take_while(move |value| { closure .run_with_value(value.clone()) - .map(|data| data.into_value(head).is_true()) + .and_then(|data| data.into_value(head)) + .map(|cond| cond.is_true()) .unwrap_or(false) }) .into_pipeline_data_with_metadata(head, engine_state.ctrlc.clone(), metadata)) diff --git a/crates/nu-command/src/filters/tee.rs b/crates/nu-command/src/filters/tee.rs index 319f70905c..936dee5c79 100644 --- a/crates/nu-command/src/filters/tee.rs +++ b/crates/nu-command/src/filters/tee.rs @@ -1,6 +1,17 @@ use nu_engine::{command_prelude::*, get_eval_block_with_early_return}; -use nu_protocol::{engine::Closure, OutDest, RawStream}; -use std::{sync::mpsc, thread}; +use nu_protocol::{ + byte_stream::copy_with_interrupt, engine::Closure, process::ChildPipe, ByteStream, + ByteStreamSource, OutDest, +}; +use std::{ + io::{self, Read, Write}, + sync::{ + atomic::AtomicBool, + mpsc::{self, Sender}, + Arc, + }, + thread::{self, JoinHandle}, +}; #[derive(Clone)] pub struct Tee; @@ -67,138 +78,205 @@ use it in your pipeline."# let head = call.head; let use_stderr = call.has_flag(engine_state, stack, "stderr")?; - let Spanned { - item: Closure { block_id, captures }, - span: closure_span, - } = call.req(engine_state, stack, 0)?; + let closure: Spanned = call.req(engine_state, stack, 0)?; + let closure_span = closure.span; + let closure = closure.item; - let closure_engine_state = engine_state.clone(); - let mut closure_stack = stack - .captures_to_stack_preserve_out_dest(captures) - .reset_pipes(); + let mut eval_block = { + let closure_engine_state = engine_state.clone(); + let mut closure_stack = stack + .captures_to_stack_preserve_out_dest(closure.captures) + .reset_pipes(); + let eval_block_with_early_return = get_eval_block_with_early_return(engine_state); - let metadata = input.metadata(); - let metadata_clone = metadata.clone(); + move |input| { + let result = eval_block_with_early_return( + &closure_engine_state, + &mut closure_stack, + closure_engine_state.get_block(closure.block_id), + input, + ); + // Make sure to drain any iterator produced to avoid unexpected behavior + result.and_then(|data| data.drain().map(|_| ())) + } + }; - let eval_block_with_early_return = get_eval_block_with_early_return(engine_state); + if let PipelineData::ByteStream(stream, metadata) = input { + let span = stream.span(); + let ctrlc = engine_state.ctrlc.clone(); + let eval_block = { + let metadata = metadata.clone(); + move |stream| eval_block(PipelineData::ByteStream(stream, metadata)) + }; - match input { - // Handle external streams specially, to make sure they pass through - PipelineData::ExternalStream { - stdout, - stderr, - exit_code, - span, - metadata, - trim_end_newline, - } => { - let known_size = if use_stderr { - stderr.as_ref().and_then(|s| s.known_size) - } else { - stdout.as_ref().and_then(|s| s.known_size) - }; + match stream.into_source() { + ByteStreamSource::Read(read) => { + if use_stderr { + return stderr_misuse(span, head); + } - let with_stream = move |rx: mpsc::Receiver, ShellError>>| { - let iter = rx.into_iter(); - let input_from_channel = PipelineData::ExternalStream { - stdout: Some(RawStream::new( - Box::new(iter), - closure_engine_state.ctrlc.clone(), - span, - known_size, - )), - stderr: None, - exit_code: None, - span, - metadata: metadata_clone, - trim_end_newline, + let tee = IoTee::new(read, span, eval_block)?; + + Ok(PipelineData::ByteStream( + ByteStream::read(tee, span, ctrlc), + metadata, + )) + } + ByteStreamSource::File(file) => { + if use_stderr { + return stderr_misuse(span, head); + } + + let tee = IoTee::new(file, span, eval_block)?; + + Ok(PipelineData::ByteStream( + ByteStream::read(tee, span, ctrlc), + metadata, + )) + } + ByteStreamSource::Child(mut child) => { + let stderr_thread = if use_stderr { + let stderr_thread = if let Some(stderr) = child.stderr.take() { + match stack.stderr() { + OutDest::Pipe | OutDest::Capture => { + let tee = IoTee::new(stderr, span, eval_block)?; + child.stderr = Some(ChildPipe::Tee(Box::new(tee))); + None + } + OutDest::Null => Some(tee_pipe_on_thread( + stderr, + io::sink(), + span, + ctrlc.as_ref(), + eval_block, + )?), + OutDest::Inherit => Some(tee_pipe_on_thread( + stderr, + io::stderr(), + span, + ctrlc.as_ref(), + eval_block, + )?), + OutDest::File(file) => Some(tee_pipe_on_thread( + stderr, + file.clone(), + span, + ctrlc.as_ref(), + eval_block, + )?), + } + } else { + None + }; + + if let Some(stdout) = child.stdout.take() { + match stack.stdout() { + OutDest::Pipe | OutDest::Capture => { + child.stdout = Some(stdout); + Ok(()) + } + OutDest::Null => { + copy_pipe(stdout, io::sink(), span, ctrlc.as_deref()) + } + OutDest::Inherit => { + copy_pipe(stdout, io::stdout(), span, ctrlc.as_deref()) + } + OutDest::File(file) => { + copy_pipe(stdout, file.as_ref(), span, ctrlc.as_deref()) + } + }?; + } + + stderr_thread + } else { + let stderr_thread = if let Some(stderr) = child.stderr.take() { + match stack.stderr() { + OutDest::Pipe | OutDest::Capture => { + child.stderr = Some(stderr); + Ok(None) + } + OutDest::Null => { + copy_pipe_on_thread(stderr, io::sink(), span, ctrlc.as_ref()) + .map(Some) + } + OutDest::Inherit => { + copy_pipe_on_thread(stderr, io::stderr(), span, ctrlc.as_ref()) + .map(Some) + } + OutDest::File(file) => { + copy_pipe_on_thread(stderr, file.clone(), span, ctrlc.as_ref()) + .map(Some) + } + }? + } else { + None + }; + + if let Some(stdout) = child.stdout.take() { + match stack.stdout() { + OutDest::Pipe | OutDest::Capture => { + let tee = IoTee::new(stdout, span, eval_block)?; + child.stdout = Some(ChildPipe::Tee(Box::new(tee))); + Ok(()) + } + OutDest::Null => { + tee_pipe(stdout, io::sink(), span, ctrlc.as_deref(), eval_block) + } + OutDest::Inherit => tee_pipe( + stdout, + io::stdout(), + span, + ctrlc.as_deref(), + eval_block, + ), + OutDest::File(file) => tee_pipe( + stdout, + file.as_ref(), + span, + ctrlc.as_deref(), + eval_block, + ), + }?; + } + + stderr_thread }; - let result = eval_block_with_early_return( - &closure_engine_state, - &mut closure_stack, - closure_engine_state.get_block(block_id), - input_from_channel, - ); - // Make sure to drain any iterator produced to avoid unexpected behavior - result.and_then(|data| data.drain()) - }; - if use_stderr { - let stderr = stderr - .map(|stderr| { - let iter = tee(stderr.stream, with_stream).err_span(head)?; - Ok::<_, ShellError>(RawStream::new( - Box::new(iter.map(flatten_result)), - stderr.ctrlc, - stderr.span, - stderr.known_size, - )) - }) - .transpose()?; - Ok(PipelineData::ExternalStream { - stdout, - stderr, - exit_code, - span, - metadata, - trim_end_newline, - }) - } else { - let stdout = stdout - .map(|stdout| { - let iter = tee(stdout.stream, with_stream).err_span(head)?; - Ok::<_, ShellError>(RawStream::new( - Box::new(iter.map(flatten_result)), - stdout.ctrlc, - stdout.span, - stdout.known_size, - )) - }) - .transpose()?; - Ok(PipelineData::ExternalStream { - stdout, - stderr, - exit_code, - span, - metadata, - trim_end_newline, - }) + if child.stdout.is_some() || child.stderr.is_some() { + Ok(PipelineData::ByteStream( + ByteStream::child(*child, span), + metadata, + )) + } else { + if let Some(thread) = stderr_thread { + thread.join().unwrap_or_else(|_| Err(panic_error()))?; + } + child.wait()?; + Ok(PipelineData::Empty) + } } } - // --stderr is not allowed if the input is not an external stream - _ if use_stderr => Err(ShellError::UnsupportedInput { - msg: "--stderr can only be used on external streams".into(), - input: "the input to `tee` is not an external stream".into(), - msg_span: head, - input_span: input.span().unwrap_or(head), - }), - // Handle others with the plain iterator - _ => { - let teed = tee(input.into_iter(), move |rx| { - let input_from_channel = rx.into_pipeline_data_with_metadata( - head, - closure_engine_state.ctrlc.clone(), - metadata_clone, - ); - let result = eval_block_with_early_return( - &closure_engine_state, - &mut closure_stack, - closure_engine_state.get_block(block_id), - input_from_channel, - ); - // Make sure to drain any iterator produced to avoid unexpected behavior - result.and_then(|data| data.drain()) - }) - .err_span(head)? - .map(move |result| result.unwrap_or_else(|err| Value::error(err, closure_span))) - .into_pipeline_data_with_metadata( - head, - engine_state.ctrlc.clone(), - metadata, - ); - - Ok(teed) + } else { + if use_stderr { + return stderr_misuse(input.span().unwrap_or(head), head); } + + let span = input.span().unwrap_or(head); + let ctrlc = engine_state.ctrlc.clone(); + let metadata = input.metadata(); + let metadata_clone = metadata.clone(); + + Ok(tee(input.into_iter(), move |rx| { + let input = rx.into_pipeline_data_with_metadata(span, ctrlc, metadata_clone); + eval_block(input) + }) + .err_span(call.head)? + .map(move |result| result.unwrap_or_else(|err| Value::error(err, closure_span))) + .into_pipeline_data_with_metadata( + span, + engine_state.ctrlc.clone(), + metadata, + )) } } @@ -213,10 +291,6 @@ fn panic_error() -> ShellError { } } -fn flatten_result(result: Result, E>) -> Result { - result.unwrap_or_else(Err) -} - /// Copies the iterator to a channel on another thread. If an error is produced on that thread, /// it is embedded in the resulting iterator as an `Err` as soon as possible. When the iterator /// finishes, it waits for the other thread to finish, also handling any error produced at that @@ -233,7 +307,7 @@ where let mut thread = Some( thread::Builder::new() - .name("stderr consumer".into()) + .name("tee".into()) .spawn(move || with_cloned_stream(rx))?, ); @@ -273,6 +347,134 @@ where })) } +fn stderr_misuse(span: Span, head: Span) -> Result { + Err(ShellError::UnsupportedInput { + msg: "--stderr can only be used on external commands".into(), + input: "the input to `tee` is not an external commands".into(), + msg_span: head, + input_span: span, + }) +} + +struct IoTee { + reader: R, + sender: Option>>, + thread: Option>>, +} + +impl IoTee { + fn new( + reader: R, + span: Span, + eval_block: impl FnOnce(ByteStream) -> Result<(), ShellError> + Send + 'static, + ) -> Result { + let (sender, receiver) = mpsc::channel(); + + let thread = thread::Builder::new() + .name("tee".into()) + .spawn(move || eval_block(ByteStream::from_iter(receiver, span, None))) + .err_span(span)?; + + Ok(Self { + reader, + sender: Some(sender), + thread: Some(thread), + }) + } +} + +impl Read for IoTee { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + if let Some(thread) = self.thread.take() { + if thread.is_finished() { + if let Err(err) = thread.join().unwrap_or_else(|_| Err(panic_error())) { + return Err(io::Error::new(io::ErrorKind::Other, err)); + } + } else { + self.thread = Some(thread) + } + } + let len = self.reader.read(buf)?; + if len == 0 { + self.sender = None; + if let Some(thread) = self.thread.take() { + if let Err(err) = thread.join().unwrap_or_else(|_| Err(panic_error())) { + return Err(io::Error::new(io::ErrorKind::Other, err)); + } + } + } else if let Some(sender) = self.sender.as_mut() { + if sender.send(buf[..len].to_vec()).is_err() { + self.sender = None; + } + } + Ok(len) + } +} + +fn tee_pipe( + pipe: ChildPipe, + mut dest: impl Write, + span: Span, + ctrlc: Option<&AtomicBool>, + eval_block: impl FnOnce(ByteStream) -> Result<(), ShellError> + Send + 'static, +) -> Result<(), ShellError> { + match pipe { + ChildPipe::Pipe(pipe) => { + let mut tee = IoTee::new(pipe, span, eval_block)?; + copy_with_interrupt(&mut tee, &mut dest, span, ctrlc)?; + } + ChildPipe::Tee(tee) => { + let mut tee = IoTee::new(tee, span, eval_block)?; + copy_with_interrupt(&mut tee, &mut dest, span, ctrlc)?; + } + } + Ok(()) +} + +fn tee_pipe_on_thread( + pipe: ChildPipe, + dest: impl Write + Send + 'static, + span: Span, + ctrlc: Option<&Arc>, + eval_block: impl FnOnce(ByteStream) -> Result<(), ShellError> + Send + 'static, +) -> Result>, ShellError> { + let ctrlc = ctrlc.cloned(); + thread::Builder::new() + .name("stderr tee".into()) + .spawn(move || tee_pipe(pipe, dest, span, ctrlc.as_deref(), eval_block)) + .map_err(|e| e.into_spanned(span).into()) +} + +fn copy_pipe( + pipe: ChildPipe, + mut dest: impl Write, + span: Span, + ctrlc: Option<&AtomicBool>, +) -> Result<(), ShellError> { + match pipe { + ChildPipe::Pipe(mut pipe) => { + copy_with_interrupt(&mut pipe, &mut dest, span, ctrlc)?; + } + ChildPipe::Tee(mut tee) => { + copy_with_interrupt(&mut tee, &mut dest, span, ctrlc)?; + } + } + Ok(()) +} + +fn copy_pipe_on_thread( + pipe: ChildPipe, + dest: impl Write + Send + 'static, + span: Span, + ctrlc: Option<&Arc>, +) -> Result>, ShellError> { + let ctrlc = ctrlc.cloned(); + thread::Builder::new() + .name("stderr copier".into()) + .spawn(move || copy_pipe(pipe, dest, span, ctrlc.as_deref())) + .map_err(|e| e.into_spanned(span).into()) +} + #[test] fn tee_copies_values_to_other_thread_and_passes_them_through() { let (tx, rx) = mpsc::channel(); diff --git a/crates/nu-command/src/filters/update.rs b/crates/nu-command/src/filters/update.rs index d963e39995..0d914d2d8e 100644 --- a/crates/nu-command/src/filters/update.rs +++ b/crates/nu-command/src/filters/update.rs @@ -225,8 +225,8 @@ fn update( type_name: "empty pipeline".to_string(), span: head, }), - PipelineData::ExternalStream { .. } => Err(ShellError::IncompatiblePathAccess { - type_name: "external stream".to_string(), + PipelineData::ByteStream(..) => Err(ShellError::IncompatiblePathAccess { + type_name: "byte stream".to_string(), span: head, }), } @@ -250,7 +250,7 @@ fn update_value_by_closure( let new_value = closure .add_arg(arg.clone()) .run_with_input(value_at_path.into_pipeline_data())? - .into_value(span); + .into_value(span)?; value.update_data_at_cell_path(cell_path, new_value) } @@ -273,7 +273,7 @@ fn update_single_value_by_closure( let new_value = closure .add_arg(arg.clone()) .run_with_input(value_at_path.into_pipeline_data())? - .into_value(span); + .into_value(span)?; value.update_data_at_cell_path(cell_path, new_value) } diff --git a/crates/nu-command/src/filters/upsert.rs b/crates/nu-command/src/filters/upsert.rs index 6b62b1d7bc..4313addd89 100644 --- a/crates/nu-command/src/filters/upsert.rs +++ b/crates/nu-command/src/filters/upsert.rs @@ -218,7 +218,7 @@ fn upsert( if let Value::Closure { val, .. } = replacement { ClosureEvalOnce::new(engine_state, stack, *val) .run_with_value(value)? - .into_value(head) + .into_value(head)? } else { replacement } @@ -285,8 +285,8 @@ fn upsert( type_name: "empty pipeline".to_string(), span: head, }), - PipelineData::ExternalStream { .. } => Err(ShellError::IncompatiblePathAccess { - type_name: "external stream".to_string(), + PipelineData::ByteStream(..) => Err(ShellError::IncompatiblePathAccess { + type_name: "byte stream".to_string(), span: head, }), } @@ -311,7 +311,11 @@ fn upsert_value_by_closure( .map(IntoPipelineData::into_pipeline_data) .unwrap_or(PipelineData::Empty); - let new_value = closure.add_arg(arg).run_with_input(input)?.into_value(span); + let new_value = closure + .add_arg(arg) + .run_with_input(input)? + .into_value(span)?; + value.upsert_data_at_cell_path(cell_path, new_value) } @@ -334,7 +338,11 @@ fn upsert_single_value_by_closure( .map(IntoPipelineData::into_pipeline_data) .unwrap_or(PipelineData::Empty); - let new_value = closure.add_arg(arg).run_with_input(input)?.into_value(span); + let new_value = closure + .add_arg(arg) + .run_with_input(input)? + .into_value(span)?; + value.upsert_data_at_cell_path(cell_path, new_value) } diff --git a/crates/nu-command/src/filters/utils.rs b/crates/nu-command/src/filters/utils.rs index 0ef7d916b7..8d9b1300f6 100644 --- a/crates/nu-command/src/filters/utils.rs +++ b/crates/nu-command/src/filters/utils.rs @@ -36,7 +36,7 @@ pub fn boolean_fold( break; } - let pred = closure.run_with_value(value)?.into_value(head).is_true(); + let pred = closure.run_with_value(value)?.into_value(head)?.is_true(); if pred == accumulator { return Ok(Value::bool(accumulator, head).into_pipeline_data()); diff --git a/crates/nu-command/src/filters/values.rs b/crates/nu-command/src/filters/values.rs index aa576de874..ed33ebf643 100644 --- a/crates/nu-command/src/filters/values.rs +++ b/crates/nu-command/src/filters/values.rs @@ -180,13 +180,11 @@ fn values( Err(err) => Err(err), } } - PipelineData::ExternalStream { .. } => Err(ShellError::OnlySupportsThisInputType { + PipelineData::ByteStream(stream, ..) => Err(ShellError::OnlySupportsThisInputType { exp_input_type: "record or table".into(), - wrong_type: "raw data".into(), + wrong_type: "byte stream".into(), dst_span: head, - src_span: input - .span() - .expect("PipelineData::ExternalStream had no span"), + src_span: stream.span(), }), } } diff --git a/crates/nu-command/src/filters/where_.rs b/crates/nu-command/src/filters/where_.rs index 7507a7ede1..fe73de354f 100644 --- a/crates/nu-command/src/filters/where_.rs +++ b/crates/nu-command/src/filters/where_.rs @@ -57,9 +57,14 @@ not supported."# let metadata = input.metadata(); Ok(input .into_iter_strict(head)? - .filter_map(move |value| match closure.run_with_value(value.clone()) { - Ok(data) => data.into_value(head).is_true().then_some(value), - Err(err) => Some(Value::error(err, head)), + .filter_map(move |value| { + match closure + .run_with_value(value.clone()) + .and_then(|data| data.into_value(head)) + { + Ok(cond) => cond.is_true().then_some(value), + Err(err) => Some(Value::error(err, head)), + } }) .into_pipeline_data_with_metadata(head, engine_state.ctrlc.clone(), metadata)) } diff --git a/crates/nu-command/src/filters/wrap.rs b/crates/nu-command/src/filters/wrap.rs index 24ce8e6821..52a0fb22c3 100644 --- a/crates/nu-command/src/filters/wrap.rs +++ b/crates/nu-command/src/filters/wrap.rs @@ -43,8 +43,8 @@ impl Command for Wrap { .into_iter() .map(move |x| Value::record(record! { name.clone() => x }, span)) .into_pipeline_data_with_metadata(span, engine_state.ctrlc.clone(), metadata)), - PipelineData::ExternalStream { .. } => Ok(Value::record( - record! { name => input.into_value(span) }, + PipelineData::ByteStream(stream, ..) => Ok(Value::record( + record! { name => stream.into_value()? }, span, ) .into_pipeline_data_with_metadata(metadata)), diff --git a/crates/nu-command/src/formats/from/json.rs b/crates/nu-command/src/formats/from/json.rs index 44f127152f..ea449711c1 100644 --- a/crates/nu-command/src/formats/from/json.rs +++ b/crates/nu-command/src/formats/from/json.rs @@ -59,7 +59,7 @@ impl Command for FromJson { let (string_input, span, metadata) = input.collect_string_strict(span)?; if string_input.is_empty() { - return Ok(PipelineData::new_with_metadata(metadata, span)); + return Ok(Value::nothing(span).into_pipeline_data()); } let strict = call.has_flag(engine_state, stack, "strict")?; diff --git a/crates/nu-command/src/formats/from/msgpack.rs b/crates/nu-command/src/formats/from/msgpack.rs index 75f2be2056..4d8ea5e320 100644 --- a/crates/nu-command/src/formats/from/msgpack.rs +++ b/crates/nu-command/src/formats/from/msgpack.rs @@ -2,9 +2,8 @@ // implementation here is unique. use std::{ - collections::VecDeque, error::Error, - io::{self, Cursor, ErrorKind, Write}, + io::{self, Cursor, ErrorKind}, string::FromUtf8Error, sync::{atomic::AtomicBool, Arc}, }; @@ -12,7 +11,6 @@ use std::{ use byteorder::{BigEndian, ReadBytesExt}; use chrono::{TimeZone, Utc}; use nu_engine::command_prelude::*; -use nu_protocol::RawStream; use rmp::decode::{self as mp, ValueReadError}; /// Max recursion depth @@ -121,12 +119,20 @@ MessagePack: https://msgpack.org/ read_msgpack(Cursor::new(bytes), opts) } // Deserialize from a raw stream directly without having to collect it - PipelineData::ExternalStream { - stdout: Some(raw_stream), - .. - } => read_msgpack(ReadRawStream::new(raw_stream), opts), + PipelineData::ByteStream(stream, ..) => { + let span = stream.span(); + if let Some(reader) = stream.reader() { + read_msgpack(reader, opts) + } else { + Err(ShellError::PipelineMismatch { + exp_input_type: "binary or byte stream".into(), + dst_span: call.head, + src_span: span, + }) + } + } input => Err(ShellError::PipelineMismatch { - exp_input_type: "binary".into(), + exp_input_type: "binary or byte stream".into(), dst_span: call.head, src_span: input.span().unwrap_or(call.head), }), @@ -483,57 +489,6 @@ where .map_err(|err| ReadError::Io(err, span)) } -/// Adapter to read MessagePack from a `RawStream` -/// -/// TODO: contribute this back to `RawStream` in general, with more polish, if it works -pub(crate) struct ReadRawStream { - pub stream: RawStream, - // Use a `VecDeque` for read efficiency - pub leftover: VecDeque, -} - -impl ReadRawStream { - pub(crate) fn new(mut stream: RawStream) -> ReadRawStream { - ReadRawStream { - leftover: std::mem::take(&mut stream.leftover).into(), - stream, - } - } -} - -impl io::Read for ReadRawStream { - fn read(&mut self, buf: &mut [u8]) -> io::Result { - if buf.is_empty() { - Ok(0) - } else if !self.leftover.is_empty() { - // Take as many leftover bytes as possible - self.leftover.read(buf) - } else { - // Try to get data from the RawStream. We have to be careful not to break on a zero-len - // buffer though, since that would mean EOF - loop { - if let Some(result) = self.stream.stream.next() { - let bytes = result.map_err(|err| io::Error::new(ErrorKind::Other, err))?; - if !bytes.is_empty() { - let min_len = bytes.len().min(buf.len()); - let (source, leftover_bytes) = bytes.split_at(min_len); - buf[0..min_len].copy_from_slice(source); - // Keep whatever bytes we couldn't use in the leftover vec - self.leftover.write_all(leftover_bytes)?; - return Ok(min_len); - } else { - // Zero-length buf, continue - continue; - } - } else { - // End of input - return Ok(0); - } - } - } - } -} - /// Return an error if this is not the end of file. /// /// This can help detect if parsing succeeded incorrectly, perhaps due to corruption. diff --git a/crates/nu-command/src/formats/from/msgpackz.rs b/crates/nu-command/src/formats/from/msgpackz.rs index 3200d5d876..7960f3f97a 100644 --- a/crates/nu-command/src/formats/from/msgpackz.rs +++ b/crates/nu-command/src/formats/from/msgpackz.rs @@ -2,7 +2,7 @@ use std::io::Cursor; use nu_engine::command_prelude::*; -use super::msgpack::{read_msgpack, Opts, ReadRawStream}; +use super::msgpack::{read_msgpack, Opts}; const BUFFER_SIZE: usize = 65536; @@ -50,15 +50,21 @@ impl Command for FromMsgpackz { read_msgpack(reader, opts) } // Deserialize from a raw stream directly without having to collect it - PipelineData::ExternalStream { - stdout: Some(raw_stream), - .. - } => { - let reader = brotli::Decompressor::new(ReadRawStream::new(raw_stream), BUFFER_SIZE); - read_msgpack(reader, opts) + PipelineData::ByteStream(stream, ..) => { + let span = stream.span(); + if let Some(reader) = stream.reader() { + let reader = brotli::Decompressor::new(reader, BUFFER_SIZE); + read_msgpack(reader, opts) + } else { + Err(ShellError::PipelineMismatch { + exp_input_type: "binary or byte stream".into(), + dst_span: call.head, + src_span: span, + }) + } } _ => Err(ShellError::PipelineMismatch { - exp_input_type: "binary".into(), + exp_input_type: "binary or byte stream".into(), dst_span: call.head, src_span: span, }), diff --git a/crates/nu-command/src/formats/from/ods.rs b/crates/nu-command/src/formats/from/ods.rs index fff9e98be6..ff7a76c0ca 100644 --- a/crates/nu-command/src/formats/from/ods.rs +++ b/crates/nu-command/src/formats/from/ods.rs @@ -81,28 +81,32 @@ fn convert_columns(columns: &[Value]) -> Result, ShellError> { } fn collect_binary(input: PipelineData, span: Span) -> Result, ShellError> { - let mut bytes = vec![]; - let mut values = input.into_iter(); + if let PipelineData::ByteStream(stream, ..) = input { + stream.into_bytes() + } else { + let mut bytes = vec![]; + let mut values = input.into_iter(); - loop { - match values.next() { - Some(Value::Binary { val: b, .. }) => { - bytes.extend_from_slice(&b); + loop { + match values.next() { + Some(Value::Binary { val: b, .. }) => { + bytes.extend_from_slice(&b); + } + Some(Value::Error { error, .. }) => return Err(*error), + Some(x) => { + return Err(ShellError::UnsupportedInput { + msg: "Expected binary from pipeline".to_string(), + input: "value originates from here".into(), + msg_span: span, + input_span: x.span(), + }) + } + None => break, } - Some(Value::Error { error, .. }) => return Err(*error), - Some(x) => { - return Err(ShellError::UnsupportedInput { - msg: "Expected binary from pipeline".to_string(), - input: "value originates from here".into(), - msg_span: span, - input_span: x.span(), - }) - } - None => break, } - } - Ok(bytes) + Ok(bytes) + } } fn from_ods( diff --git a/crates/nu-command/src/formats/from/xlsx.rs b/crates/nu-command/src/formats/from/xlsx.rs index b54cffe3aa..21e2567b45 100644 --- a/crates/nu-command/src/formats/from/xlsx.rs +++ b/crates/nu-command/src/formats/from/xlsx.rs @@ -82,27 +82,31 @@ fn convert_columns(columns: &[Value]) -> Result, ShellError> { } fn collect_binary(input: PipelineData, span: Span) -> Result, ShellError> { - let mut bytes = vec![]; - let mut values = input.into_iter(); + if let PipelineData::ByteStream(stream, ..) = input { + stream.into_bytes() + } else { + let mut bytes = vec![]; + let mut values = input.into_iter(); - loop { - match values.next() { - Some(Value::Binary { val: b, .. }) => { - bytes.extend_from_slice(&b); + loop { + match values.next() { + Some(Value::Binary { val: b, .. }) => { + bytes.extend_from_slice(&b); + } + Some(x) => { + return Err(ShellError::UnsupportedInput { + msg: "Expected binary from pipeline".to_string(), + input: "value originates from here".into(), + msg_span: span, + input_span: x.span(), + }) + } + None => break, } - Some(x) => { - return Err(ShellError::UnsupportedInput { - msg: "Expected binary from pipeline".to_string(), - input: "value originates from here".into(), - msg_span: span, - input_span: x.span(), - }) - } - None => break, } - } - Ok(bytes) + Ok(bytes) + } } fn from_xlsx( diff --git a/crates/nu-command/src/formats/to/delimited.rs b/crates/nu-command/src/formats/to/delimited.rs index a10f611f60..490983d67b 100644 --- a/crates/nu-command/src/formats/to/delimited.rs +++ b/crates/nu-command/src/formats/to/delimited.rs @@ -150,7 +150,7 @@ pub fn to_delimited_data( span: Span, config: &Config, ) -> Result { - let value = input.into_value(span); + let value = input.into_value(span)?; let output = match from_value_to_delimited_string(&value, sep, config, span) { Ok(mut x) => { if noheaders { diff --git a/crates/nu-command/src/formats/to/json.rs b/crates/nu-command/src/formats/to/json.rs index 0796abc8dc..c4c87f804f 100644 --- a/crates/nu-command/src/formats/to/json.rs +++ b/crates/nu-command/src/formats/to/json.rs @@ -46,7 +46,7 @@ impl Command for ToJson { let span = call.head; // allow ranges to expand and turn into array let input = input.try_expand_range()?; - let value = input.into_value(span); + let value = input.into_value(span)?; let json_value = value_to_json_value(&value)?; let json_result = if raw { diff --git a/crates/nu-command/src/formats/to/msgpack.rs b/crates/nu-command/src/formats/to/msgpack.rs index a4575f37e4..bfeb428e3e 100644 --- a/crates/nu-command/src/formats/to/msgpack.rs +++ b/crates/nu-command/src/formats/to/msgpack.rs @@ -75,7 +75,7 @@ MessagePack: https://msgpack.org/ input: PipelineData, ) -> Result { let value_span = input.span().unwrap_or(call.head); - let value = input.into_value(value_span); + let value = input.into_value(value_span)?; let mut out = vec![]; write_value(&mut out, &value, 0)?; diff --git a/crates/nu-command/src/formats/to/msgpackz.rs b/crates/nu-command/src/formats/to/msgpackz.rs index a07e1206c1..9168d05018 100644 --- a/crates/nu-command/src/formats/to/msgpackz.rs +++ b/crates/nu-command/src/formats/to/msgpackz.rs @@ -70,7 +70,7 @@ impl Command for ToMsgpackz { .transpose()?; let value_span = input.span().unwrap_or(call.head); - let value = input.into_value(value_span); + let value = input.into_value(value_span)?; let mut out_buf = vec![]; let mut out = brotli::CompressorWriter::new( &mut out_buf, diff --git a/crates/nu-command/src/formats/to/nuon.rs b/crates/nu-command/src/formats/to/nuon.rs index e747ac58f6..f40b7b5c1d 100644 --- a/crates/nu-command/src/formats/to/nuon.rs +++ b/crates/nu-command/src/formats/to/nuon.rs @@ -53,7 +53,7 @@ impl Command for ToNuon { }; let span = call.head; - let value = input.into_value(span); + let value = input.into_value(span)?; match nuon::to_nuon(&value, style, Some(span)) { Ok(serde_nuon_string) => { diff --git a/crates/nu-command/src/formats/to/text.rs b/crates/nu-command/src/formats/to/text.rs index 7c12dc2821..7f1d632c13 100644 --- a/crates/nu-command/src/formats/to/text.rs +++ b/crates/nu-command/src/formats/to/text.rs @@ -1,6 +1,12 @@ use chrono_humanize::HumanTime; use nu_engine::command_prelude::*; -use nu_protocol::{format_duration, format_filesize_from_conf, Config, RawStream, ValueIterator}; +use nu_protocol::{format_duration, format_filesize_from_conf, ByteStream, Config}; + +const LINE_ENDING: &str = if cfg!(target_os = "windows") { + "\r\n" +} else { + "\n" +}; #[derive(Clone)] pub struct ToText; @@ -28,39 +34,28 @@ impl Command for ToText { input: PipelineData, ) -> Result { let span = call.head; - let config = engine_state.get_config(); - - let line_ending = if cfg!(target_os = "windows") { - "\r\n" - } else { - "\n" - }; let input = input.try_expand_range()?; - if let PipelineData::ListStream(stream, _) = input { - Ok(PipelineData::ExternalStream { - stdout: Some(RawStream::new( - Box::new(ListStreamIterator { - stream: stream.into_inner(), - separator: line_ending.into(), - config: config.clone(), - }), - engine_state.ctrlc.clone(), - span, - None, - )), - stderr: None, - exit_code: None, - span, - metadata: None, - trim_end_newline: false, - }) - } else { - // FIXME: don't collect! stream the output wherever possible! - // Even if the data is collected when it arrives at `to text`, we should be able to stream it out - let collected_input = local_into_string(input.into_value(span), line_ending, config); - - Ok(Value::string(collected_input, span).into_pipeline_data()) + match input { + PipelineData::Empty => Ok(Value::string(String::new(), span).into_pipeline_data()), + PipelineData::Value(value, ..) => { + let str = local_into_string(value, LINE_ENDING, engine_state.get_config()); + Ok(Value::string(str, span).into_pipeline_data()) + } + PipelineData::ListStream(stream, meta) => { + let span = stream.span(); + let config = engine_state.get_config().clone(); + let iter = stream.into_inner().map(move |value| { + let mut str = local_into_string(value, LINE_ENDING, &config); + str.push_str(LINE_ENDING); + str + }); + Ok(PipelineData::ByteStream( + ByteStream::from_iter(iter, span, engine_state.ctrlc.clone()), + meta, + )) + } + PipelineData::ByteStream(stream, meta) => Ok(PipelineData::ByteStream(stream, meta)), } } @@ -85,26 +80,6 @@ impl Command for ToText { } } -struct ListStreamIterator { - stream: ValueIterator, - separator: String, - config: Config, -} - -impl Iterator for ListStreamIterator { - type Item = Result, ShellError>; - - fn next(&mut self) -> Option { - if let Some(item) = self.stream.next() { - let mut string = local_into_string(item, &self.separator, &self.config); - string.push_str(&self.separator); - Some(Ok(string.as_bytes().to_vec())) - } else { - None - } - } -} - fn local_into_string(value: Value, separator: &str, config: &Config) -> String { let span = value.span(); match value { diff --git a/crates/nu-command/src/formats/to/toml.rs b/crates/nu-command/src/formats/to/toml.rs index 385e9576f3..7423f147dd 100644 --- a/crates/nu-command/src/formats/to/toml.rs +++ b/crates/nu-command/src/formats/to/toml.rs @@ -141,7 +141,7 @@ fn to_toml( input: PipelineData, span: Span, ) -> Result { - let value = input.into_value(span); + let value = input.into_value(span)?; let toml_value = value_to_toml_value(engine_state, &value, span)?; match toml_value { diff --git a/crates/nu-command/src/formats/to/xml.rs b/crates/nu-command/src/formats/to/xml.rs index 2cfec24470..648094318b 100644 --- a/crates/nu-command/src/formats/to/xml.rs +++ b/crates/nu-command/src/formats/to/xml.rs @@ -132,7 +132,7 @@ impl Job { } fn run(mut self, input: PipelineData, head: Span) -> Result { - let value = input.into_value(head); + let value = input.into_value(head)?; self.write_xml_entry(value, true).and_then(|_| { let b = self.writer.into_inner().into_inner(); diff --git a/crates/nu-command/src/formats/to/yaml.rs b/crates/nu-command/src/formats/to/yaml.rs index d03c886328..bea2dd3381 100644 --- a/crates/nu-command/src/formats/to/yaml.rs +++ b/crates/nu-command/src/formats/to/yaml.rs @@ -95,7 +95,7 @@ pub fn value_to_yaml_value(v: &Value) -> Result { } fn to_yaml(input: PipelineData, head: Span) -> Result { - let value = input.into_value(head); + let value = input.into_value(head)?; let yaml_value = value_to_yaml_value(&value)?; match serde_yaml::to_string(&yaml_value) { diff --git a/crates/nu-command/src/generators/generate.rs b/crates/nu-command/src/generators/generate.rs index 0a01b79c08..3549667ff0 100644 --- a/crates/nu-command/src/generators/generate.rs +++ b/crates/nu-command/src/generators/generate.rs @@ -158,14 +158,16 @@ used as the next argument to the closure, otherwise generation stops. } Ok(other) => { - let val = other.into_value(head); - let error = ShellError::GenericError { - error: "Invalid block return".into(), - msg: format!("Expected record, found {}", val.get_type()), - span: Some(val.span()), - help: None, - inner: vec![], - }; + let error = other + .into_value(head) + .map(|val| ShellError::GenericError { + error: "Invalid block return".into(), + msg: format!("Expected record, found {}", val.get_type()), + span: Some(val.span()), + help: None, + inner: vec![], + }) + .unwrap_or_else(|err| err); (Some(Value::error(error, head)), None) } diff --git a/crates/nu-command/src/hash/generic_digest.rs b/crates/nu-command/src/hash/generic_digest.rs index 476915f07d..ab15ccae7a 100644 --- a/crates/nu-command/src/hash/generic_digest.rs +++ b/crates/nu-command/src/hash/generic_digest.rs @@ -1,7 +1,6 @@ use nu_cmd_base::input_handler::{operate, CmdArgument}; use nu_engine::command_prelude::*; - -use std::marker::PhantomData; +use std::{io::Write, marker::PhantomData}; pub trait HashDigest: digest::Digest + Clone { fn name() -> &'static str; @@ -38,7 +37,7 @@ impl CmdArgument for Arguments { impl Command for GenericDigest where - D: HashDigest + Send + Sync + 'static, + D: HashDigest + Write + Send + Sync + 'static, digest::Output: core::fmt::LowerHex, { fn name(&self) -> &str { @@ -81,54 +80,23 @@ where call: &Call, input: PipelineData, ) -> Result { + let head = call.head; let binary = call.has_flag(engine_state, stack, "binary")?; let cell_paths: Vec = call.rest(engine_state, stack, 0)?; let cell_paths = (!cell_paths.is_empty()).then_some(cell_paths); - let args = Arguments { binary, cell_paths }; let mut hasher = D::new(); - match input { - PipelineData::ExternalStream { - stdout: Some(stream), - span, - .. - } => { - for item in stream { - match item { - // String and binary data are valid byte patterns - Ok(Value::String { val, .. }) => hasher.update(val.as_bytes()), - Ok(Value::Binary { val, .. }) => hasher.update(val), - // If any Error value is output, echo it back - Ok(v @ Value::Error { .. }) => return Ok(v.into_pipeline_data()), - // Unsupported data - Ok(other) => { - return Ok(Value::error( - ShellError::OnlySupportsThisInputType { - exp_input_type: "string and binary".into(), - wrong_type: other.get_type().to_string(), - dst_span: span, - src_span: other.span(), - }, - span, - ) - .into_pipeline_data()); - } - Err(err) => return Err(err), - }; - } - let digest = hasher.finalize(); - if args.binary { - Ok(Value::binary(digest.to_vec(), span).into_pipeline_data()) - } else { - Ok(Value::string(format!("{digest:x}"), span).into_pipeline_data()) - } + + if let PipelineData::ByteStream(stream, ..) = input { + stream.write_to(&mut hasher)?; + let digest = hasher.finalize(); + if binary { + Ok(Value::binary(digest.to_vec(), head).into_pipeline_data()) + } else { + Ok(Value::string(format!("{digest:x}"), head).into_pipeline_data()) } - _ => operate( - action::, - args, - input, - call.head, - engine_state.ctrlc.clone(), - ), + } else { + let args = Arguments { binary, cell_paths }; + operate(action::, args, input, head, engine_state.ctrlc.clone()) } } } diff --git a/crates/nu-command/src/misc/tutor.rs b/crates/nu-command/src/misc/tutor.rs index 8eeec6f393..6b1c43534b 100644 --- a/crates/nu-command/src/misc/tutor.rs +++ b/crates/nu-command/src/misc/tutor.rs @@ -409,15 +409,15 @@ fn display(help: &str, engine_state: &EngineState, stack: &mut Stack, span: Span //TODO: support no-color mode if let Some(highlighter) = engine_state.find_decl(b"nu-highlight", &[]) { let decl = engine_state.get_decl(highlighter); - - if let Ok(output) = decl.run( + let result = decl.run( engine_state, stack, &Call::new(span), Value::string(item, Span::unknown()).into_pipeline_data(), - ) { - let result = output.into_value(Span::unknown()); - match result.coerce_into_string() { + ); + + if let Ok(value) = result.and_then(|data| data.into_value(Span::unknown())) { + match value.coerce_into_string() { Ok(s) => { build.push_str(&s); } diff --git a/crates/nu-command/src/network/http/client.rs b/crates/nu-command/src/network/http/client.rs index 2833f76a97..54f7749627 100644 --- a/crates/nu-command/src/network/http/client.rs +++ b/crates/nu-command/src/network/http/client.rs @@ -5,10 +5,9 @@ use base64::{ Engine, }; use nu_engine::command_prelude::*; -use nu_protocol::{BufferedReader, RawStream}; +use nu_protocol::ByteStream; use std::{ collections::HashMap, - io::BufReader, path::PathBuf, str::FromStr, sync::{ @@ -119,21 +118,11 @@ pub fn response_to_buffer( }; let reader = response.into_reader(); - let buffered_input = BufReader::new(reader); - PipelineData::ExternalStream { - stdout: Some(RawStream::new( - Box::new(BufferedReader::new(buffered_input)), - engine_state.ctrlc.clone(), - span, - buffer_size, - )), - stderr: None, - exit_code: None, - span, - metadata: None, - trim_end_newline: false, - } + PipelineData::ByteStream( + ByteStream::read(reader, span, engine_state.ctrlc.clone()).with_known_size(buffer_size), + None, + ) } pub fn request_add_authorization_header( @@ -529,25 +518,25 @@ fn request_handle_response_content( if flags.full { let response_status = resp.status(); - let request_headers_value = match headers_to_nu(&extract_request_headers(&request), span) { - Ok(headers) => headers.into_value(span), - Err(_) => Value::nothing(span), - }; + let request_headers_value = headers_to_nu(&extract_request_headers(&request), span) + .and_then(|data| data.into_value(span)) + .unwrap_or(Value::nothing(span)); - let response_headers_value = match headers_to_nu(&extract_response_headers(&resp), span) { - Ok(headers) => headers.into_value(span), - Err(_) => Value::nothing(span), - }; + let response_headers_value = headers_to_nu(&extract_response_headers(&resp), span) + .and_then(|data| data.into_value(span)) + .unwrap_or(Value::nothing(span)); let headers = record! { "request" => request_headers_value, "response" => response_headers_value, }; + let body = consume_response_body(resp)?.into_value(span)?; + let full_response = Value::record( record! { "headers" => Value::record(headers, span), - "body" => consume_response_body(resp)?.into_value(span), + "body" => body, "status" => Value::int(response_status as i64, span), }, span, diff --git a/crates/nu-command/src/network/url/parse.rs b/crates/nu-command/src/network/url/parse.rs index 8a80553eca..e71c8d472a 100644 --- a/crates/nu-command/src/network/url/parse.rs +++ b/crates/nu-command/src/network/url/parse.rs @@ -42,7 +42,7 @@ impl Command for SubCommand { call: &Call, input: PipelineData, ) -> Result { - parse(input.into_value(call.head), call.head, engine_state) + parse(input.into_value(call.head)?, call.head, engine_state) } fn examples(&self) -> Vec { diff --git a/crates/nu-command/src/path/join.rs b/crates/nu-command/src/path/join.rs index eb820606af..19d65b4c46 100644 --- a/crates/nu-command/src/path/join.rs +++ b/crates/nu-command/src/path/join.rs @@ -171,8 +171,8 @@ fn run(call: &Call, args: &Arguments, input: PipelineData) -> Result Ok(PipelineData::Value(handle_value(val, args, head), md)), - PipelineData::ListStream(..) => Ok(PipelineData::Value( - handle_value(input.into_value(head), args, head), + PipelineData::ListStream(stream, ..) => Ok(PipelineData::Value( + handle_value(stream.into_value(), args, head), metadata, )), PipelineData::Empty { .. } => Err(ShellError::PipelineEmpty { dst_span: head }), diff --git a/crates/nu-command/src/progress_bar.rs b/crates/nu-command/src/progress_bar.rs index 17ddeeb64e..db4d4e23b1 100644 --- a/crates/nu-command/src/progress_bar.rs +++ b/crates/nu-command/src/progress_bar.rs @@ -6,8 +6,6 @@ use std::fmt; pub struct NuProgressBar { pub pb: ProgressBar, - bytes_processed: u64, - total_bytes: Option, } impl NuProgressBar { @@ -40,8 +38,6 @@ impl NuProgressBar { NuProgressBar { pb: new_progress_bar, - total_bytes: None, - bytes_processed: 0, } } @@ -57,12 +53,4 @@ impl NuProgressBar { pub fn abandoned_msg(&self, msg: String) { self.pb.abandon_with_message(msg); } - - pub fn clone(&self) -> NuProgressBar { - NuProgressBar { - pb: self.pb.clone(), - bytes_processed: self.bytes_processed, - total_bytes: self.total_bytes, - } - } } diff --git a/crates/nu-command/src/strings/encode_decode/decode.rs b/crates/nu-command/src/strings/encode_decode/decode.rs index 25b8f59ec2..9b13fad202 100644 --- a/crates/nu-command/src/strings/encode_decode/decode.rs +++ b/crates/nu-command/src/strings/encode_decode/decode.rs @@ -57,16 +57,12 @@ documentation link at https://docs.rs/encoding_rs/latest/encoding_rs/#statics"# let encoding: Option> = call.opt(engine_state, stack, 0)?; match input { - PipelineData::ExternalStream { stdout: None, .. } => Ok(PipelineData::empty()), - PipelineData::ExternalStream { - stdout: Some(stream), - span: input_span, - .. - } => { - let bytes: Vec = stream.into_bytes()?.item; + PipelineData::ByteStream(stream, ..) => { + let span = stream.span(); + let bytes = stream.into_bytes()?; match encoding { Some(encoding_name) => super::encoding::decode(head, encoding_name, &bytes), - None => super::encoding::detect_encoding_name(head, input_span, &bytes) + None => super::encoding::detect_encoding_name(head, span, &bytes) .map(|encoding| encoding.decode(&bytes).0.into_owned()) .map(|s| Value::string(s, head)), } diff --git a/crates/nu-command/src/strings/encode_decode/encode.rs b/crates/nu-command/src/strings/encode_decode/encode.rs index 98fcc34179..113c0fe548 100644 --- a/crates/nu-command/src/strings/encode_decode/encode.rs +++ b/crates/nu-command/src/strings/encode_decode/encode.rs @@ -81,13 +81,10 @@ documentation link at https://docs.rs/encoding_rs/latest/encoding_rs/#statics"# let ignore_errors = call.has_flag(engine_state, stack, "ignore-errors")?; match input { - PipelineData::ExternalStream { stdout: None, .. } => Ok(PipelineData::empty()), - PipelineData::ExternalStream { - stdout: Some(stream), - .. - } => { + PipelineData::ByteStream(stream, ..) => { + let span = stream.span(); let s = stream.into_string()?; - super::encoding::encode(head, encoding, &s.item, s.span, ignore_errors) + super::encoding::encode(head, encoding, &s, span, ignore_errors) .map(|val| val.into_pipeline_data()) } PipelineData::Value(v, ..) => { diff --git a/crates/nu-command/src/strings/parse.rs b/crates/nu-command/src/strings/parse.rs index 51067a16a2..bc70d4679c 100644 --- a/crates/nu-command/src/strings/parse.rs +++ b/crates/nu-command/src/strings/parse.rs @@ -208,30 +208,21 @@ fn operate( } }) .into()), - PipelineData::ExternalStream { stdout: None, .. } => Ok(PipelineData::Empty), - PipelineData::ExternalStream { - stdout: Some(stream), - .. - } => { - // Collect all `stream` chunks into a single `chunk` to be able to deal with matches that - // extend across chunk boundaries. - // This is a stop-gap solution until the `regex` crate supports streaming or an alternative - // solution is found. - // See https://github.com/nushell/nushell/issues/9795 - let str = stream.into_string()?.item; + PipelineData::ByteStream(stream, ..) => { + if let Some(lines) = stream.lines() { + let iter = ParseIter { + captures: VecDeque::new(), + regex, + columns, + iter: lines, + span: head, + ctrlc, + }; - // let iter = stream.lines(); - - let iter = ParseIter { - captures: VecDeque::new(), - regex, - columns, - iter: std::iter::once(Ok(str)), - span: head, - ctrlc, - }; - - Ok(ListStream::new(iter, head, None).into()) + Ok(ListStream::new(iter, head, None).into()) + } else { + Ok(PipelineData::Empty) + } } } } diff --git a/crates/nu-command/src/system/complete.rs b/crates/nu-command/src/system/complete.rs index c622c86f3c..409cef1f27 100644 --- a/crates/nu-command/src/system/complete.rs +++ b/crates/nu-command/src/system/complete.rs @@ -1,6 +1,5 @@ use nu_engine::command_prelude::*; use nu_protocol::OutDest; -use std::thread; #[derive(Clone)] pub struct Complete; @@ -31,78 +30,53 @@ impl Command for Complete { call: &Call, input: PipelineData, ) -> Result { + let head = call.head; match input { - PipelineData::ExternalStream { - stdout, - stderr, - exit_code, - .. - } => { - let mut record = Record::new(); - - // use a thread to receive stderr message. - // Or we may get a deadlock if child process sends out too much bytes to stdout. - // - // For example: in normal linux system, stdout pipe's limit is 65535 bytes. - // if child process sends out 65536 bytes, the process will be hanged because no consumer - // consumes the first 65535 bytes - // So we need a thread to receive stderr message, then the current thread can continue to consume - // stdout messages. - let stderr_handler = stderr - .map(|stderr| { - let stderr_span = stderr.span; - thread::Builder::new() - .name("stderr consumer".to_string()) - .spawn(move || { - let stderr = stderr.into_bytes()?; - if let Ok(st) = String::from_utf8(stderr.item.clone()) { - Ok::<_, ShellError>(Value::string(st, stderr.span)) - } else { - Ok::<_, ShellError>(Value::binary(stderr.item, stderr.span)) - } - }) - .map(|handle| (handle, stderr_span)) - .err_span(call.head) - }) - .transpose()?; - - if let Some(stdout) = stdout { - let stdout = stdout.into_bytes()?; - record.push( - "stdout", - if let Ok(st) = String::from_utf8(stdout.item.clone()) { - Value::string(st, stdout.span) - } else { - Value::binary(stdout.item, stdout.span) - }, - ) - } - - if let Some((handler, stderr_span)) = stderr_handler { - let res = handler.join().map_err(|err| ShellError::ExternalCommand { - label: "Fail to receive external commands stderr message".to_string(), - help: format!("{err:?}"), - span: stderr_span, - })??; - record.push("stderr", res) + PipelineData::ByteStream(stream, ..) => { + let Ok(child) = stream.into_child() else { + return Err(ShellError::GenericError { + error: "Complete only works with external commands".into(), + msg: "complete only works on external commands".into(), + span: Some(call.head), + help: None, + inner: vec![], + }); }; - if let Some(exit_code) = exit_code { - let mut v: Vec<_> = exit_code.into_iter().collect(); + let output = child.wait_with_output()?; + let exit_code = output.exit_status.code(); + let mut record = Record::new(); - if let Some(v) = v.pop() { - record.push("exit_code", v); - } + if let Some(stdout) = output.stdout { + record.push( + "stdout", + match String::from_utf8(stdout) { + Ok(str) => Value::string(str, head), + Err(err) => Value::binary(err.into_bytes(), head), + }, + ); } + if let Some(stderr) = output.stderr { + record.push( + "stderr", + match String::from_utf8(stderr) { + Ok(str) => Value::string(str, head), + Err(err) => Value::binary(err.into_bytes(), head), + }, + ); + } + + record.push("exit_code", Value::int(exit_code.into(), head)); + Ok(Value::record(record, call.head).into_pipeline_data()) } // bubble up errors from the previous command PipelineData::Value(Value::Error { error, .. }, _) => Err(*error), _ => Err(ShellError::GenericError { - error: "Complete only works with external streams".into(), - msg: "complete only works on external streams".into(), - span: Some(call.head), + error: "Complete only works with external commands".into(), + msg: "complete only works on external commands".into(), + span: Some(head), help: None, inner: vec![], }), diff --git a/crates/nu-command/src/system/nu_check.rs b/crates/nu-command/src/system/nu_check.rs index 260a1c7a59..f9e0879c00 100644 --- a/crates/nu-command/src/system/nu_check.rs +++ b/crates/nu-command/src/system/nu_check.rs @@ -69,18 +69,8 @@ impl Command for NuCheck { parse_script(&mut working_set, None, &contents, is_debug, call.head) } } - PipelineData::ExternalStream { - stdout: Some(stream), - .. - } => { - let mut contents = vec![]; - let raw_stream: Vec<_> = stream.stream.collect(); - for r in raw_stream { - match r { - Ok(v) => contents.extend(v), - Err(error) => return Err(error), - }; - } + PipelineData::ByteStream(stream, ..) => { + let contents = stream.into_bytes()?; if as_module { parse_module(&mut working_set, None, &contents, is_debug, call.head) @@ -160,7 +150,7 @@ impl Command for NuCheck { result: None, }, Example { - description: "Parse an external stream as script by showing error message", + description: "Parse a byte stream as script by showing error message", example: "open foo.nu | nu-check --debug script.nu", result: None, }, diff --git a/crates/nu-command/src/system/run_external.rs b/crates/nu-command/src/system/run_external.rs index e73bd4ab50..2941d80de3 100644 --- a/crates/nu-command/src/system/run_external.rs +++ b/crates/nu-command/src/system/run_external.rs @@ -1,16 +1,16 @@ use nu_cmd_base::hook::eval_hook; use nu_engine::{command_prelude::*, env_to_strings, get_eval_expression}; -use nu_protocol::{ast::Expr, did_you_mean, ListStream, NuGlob, OutDest, RawStream}; +use nu_protocol::{ast::Expr, did_you_mean, process::ChildProcess, ByteStream, NuGlob, OutDest}; use nu_system::ForegroundChild; use nu_utils::IgnoreCaseExt; use os_pipe::PipeReader; use pathdiff::diff_paths; use std::{ collections::HashMap, - io::{BufRead, BufReader, Read, Write}, + io::Write, path::{Path, PathBuf}, process::{Command as CommandSys, Stdio}, - sync::{mpsc, Arc}, + sync::Arc, thread, }; @@ -163,89 +163,124 @@ impl ExternalCommand { ) -> Result { let head = self.name.span; - #[allow(unused_mut)] - let (cmd, mut reader) = self.create_process(&input, false, head)?; - - #[cfg(all(not(unix), not(windows)))] // are there any systems like this? - let child = ForegroundChild::spawn(cmd); - #[cfg(windows)] - let child = match ForegroundChild::spawn(cmd) { - Ok(child) => Ok(child), - Err(err) => { - // Running external commands on Windows has 2 points of complication: - // 1. Some common Windows commands are actually built in to cmd.exe, not executables in their own right. - // 2. We need to let users run batch scripts etc. (.bat, .cmd) without typing their extension + let (child, reader, input) = { + // We may need to run `create_process` again, so we have to clone the underlying + // file or pipe in `input` here first. + let (input_consumed, stdin) = match &input { + PipelineData::ByteStream(stream, ..) => match stream.source() { + nu_protocol::ByteStreamSource::Read(_) => (false, Stdio::piped()), + nu_protocol::ByteStreamSource::File(file) => { + (true, file.try_clone().err_span(head)?.into()) + } + nu_protocol::ByteStreamSource::Child(child) => { + if let Some(nu_protocol::process::ChildPipe::Pipe(pipe)) = &child.stdout { + (true, pipe.try_clone().err_span(head)?.into()) + } else { + (false, Stdio::piped()) + } + } + }, + PipelineData::Empty => (false, Stdio::inherit()), + _ => (false, Stdio::piped()), + }; - // To support these situations, we have a fallback path that gets run if a command - // fails to be run as a normal executable: - // 1. "shell out" to cmd.exe if the command is a known cmd.exe internal command - // 2. Otherwise, use `which-rs` to look for batch files etc. then run those in cmd.exe + let mut input = input; + let (cmd, mut reader) = self.create_process(stdin, false, head)?; + let child = match ForegroundChild::spawn(cmd) { + Ok(child) => { + if input_consumed { + input = PipelineData::Empty; + } + Ok(child) + } + Err(err) => { + // Running external commands on Windows has 2 points of complication: + // 1. Some common Windows commands are actually built in to cmd.exe, not executables in their own right. + // 2. We need to let users run batch scripts etc. (.bat, .cmd) without typing their extension - // set the default value, maybe we'll override it later - let mut child = Err(err); + // To support these situations, we have a fallback path that gets run if a command + // fails to be run as a normal executable: + // 1. "shell out" to cmd.exe if the command is a known cmd.exe internal command + // 2. Otherwise, use `which-rs` to look for batch files etc. then run those in cmd.exe - // This has the full list of cmd.exe "internal" commands: https://ss64.com/nt/syntax-internal.html - // I (Reilly) went through the full list and whittled it down to ones that are potentially useful: - const CMD_INTERNAL_COMMANDS: [&str; 9] = [ - "ASSOC", "CLS", "ECHO", "FTYPE", "MKLINK", "PAUSE", "START", "VER", "VOL", - ]; - let command_name = &self.name.item; - let looks_like_cmd_internal = CMD_INTERNAL_COMMANDS - .iter() - .any(|&cmd| command_name.eq_ignore_ascii_case(cmd)); + // set the default value, maybe we'll override it later + let mut child = Err(err); - if looks_like_cmd_internal { - let (cmd, new_reader) = self.create_process(&input, true, head)?; - reader = new_reader; - child = ForegroundChild::spawn(cmd); - } else { - #[cfg(feature = "which-support")] - { - // maybe it's a batch file (foo.cmd) and the user typed `foo`. Try to find it with `which-rs` - // TODO: clean this up with an if-let chain once those are stable - if let Ok(path) = - nu_engine::env::path_str(engine_state, stack, self.name.span) + // This has the full list of cmd.exe "internal" commands: https://ss64.com/nt/syntax-internal.html + // I (Reilly) went through the full list and whittled it down to ones that are potentially useful: + const CMD_INTERNAL_COMMANDS: [&str; 9] = [ + "ASSOC", "CLS", "ECHO", "FTYPE", "MKLINK", "PAUSE", "START", "VER", "VOL", + ]; + let command_name = &self.name.item; + let looks_like_cmd_internal = CMD_INTERNAL_COMMANDS + .iter() + .any(|&cmd| command_name.eq_ignore_ascii_case(cmd)); + + let (data, stdin) = extract_stdio(input); + input = data; + + if looks_like_cmd_internal { + let (cmd, new_reader) = self.create_process(stdin, true, head)?; + reader = new_reader; + child = ForegroundChild::spawn(cmd); + } else { + #[cfg(feature = "which-support")] { - if let Some(cwd) = self.env_vars.get("PWD") { - // append cwd to PATH so `which-rs` looks in the cwd too. - // this approximates what cmd.exe does. - let path_with_cwd = format!("{};{}", cwd, path); - if let Ok(which_path) = - which::which_in(&self.name.item, Some(path_with_cwd), cwd) - { - if let Some(file_name) = which_path.file_name() { - if !file_name.to_string_lossy().eq_ignore_case(command_name) - { - // which-rs found an executable file with a slightly different name - // than the one the user tried. Let's try running it - let mut new_command = self.clone(); - new_command.name = Spanned { - item: file_name.to_string_lossy().to_string(), - span: self.name.span, - }; - let (cmd, new_reader) = - new_command.create_process(&input, true, head)?; - reader = new_reader; - child = ForegroundChild::spawn(cmd); + // maybe it's a batch file (foo.cmd) and the user typed `foo`. Try to find it with `which-rs` + // TODO: clean this up with an if-let chain once those are stable + if let Ok(path) = + nu_engine::env::path_str(engine_state, stack, self.name.span) + { + if let Some(cwd) = self.env_vars.get("PWD") { + // append cwd to PATH so `which-rs` looks in the cwd too. + // this approximates what cmd.exe does. + let path_with_cwd = format!("{};{}", cwd, path); + if let Ok(which_path) = + which::which_in(&self.name.item, Some(path_with_cwd), cwd) + { + if let Some(file_name) = which_path.file_name() { + if !file_name + .to_string_lossy() + .eq_ignore_case(command_name) + { + // which-rs found an executable file with a slightly different name + // than the one the user tried. Let's try running it + let mut new_command = self.clone(); + new_command.name = Spanned { + item: file_name.to_string_lossy().to_string(), + span: self.name.span, + }; + let (cmd, new_reader) = new_command + .create_process(stdin, true, head)?; + reader = new_reader; + child = ForegroundChild::spawn(cmd); + } } } } } } } - } - child - } + child + } + }; + + (child, reader, input) }; #[cfg(unix)] - let child = ForegroundChild::spawn( - cmd, - engine_state.is_interactive, - &engine_state.pipeline_externals_state, - ); + let (child, reader, input) = { + let (input, stdin) = extract_stdio(input); + let (cmd, reader) = self.create_process(stdin, false, head)?; + let child = ForegroundChild::spawn( + cmd, + engine_state.is_interactive, + &engine_state.pipeline_externals_state, + ); + (child, reader, input) + }; match child { Err(err) => { @@ -381,9 +416,8 @@ impl ExternalCommand { .name("external stdin worker".to_string()) .spawn(move || { let input = match input { - input @ PipelineData::Value(Value::Binary { .. }, ..) => { - Ok(input) - } + input @ PipelineData::ByteStream(..) => input, + input @ PipelineData::Value(Value::Binary { .. }, ..) => input, input => { let stack = &mut stack.start_capture(); // Attempt to render the input as a table before piping it to the external. @@ -397,143 +431,39 @@ impl ExternalCommand { stack, &Call::new(head), input, - ) + )? } }; - if let Ok(input) = input { + if let PipelineData::ByteStream(stream, ..) = input { + stream.write_to(&mut stdin_write)?; + } else { for value in input.into_iter() { - let buf = match value { - Value::String { val, .. } => val.into_bytes(), - Value::Binary { val, .. } => val, - _ => return Err(()), - }; - if stdin_write.write(&buf).is_err() { - return Ok(()); - } + let buf = value.coerce_into_binary()?; + stdin_write.write_all(&buf)?; } } - Ok(()) + Ok::<_, ShellError>(()) }) .err_span(head)?; } } - #[cfg(unix)] - let commandname = self.name.item.clone(); - let span = self.name.span; - let (exit_code_tx, exit_code_rx) = mpsc::channel(); + let child = + ChildProcess::new(child, reader, matches!(self.err, OutDest::Pipe), head)?; - let (stdout, stderr) = if let Some(combined) = reader { - ( - Some(RawStream::new( - Box::new(ByteLines::new(combined)), - engine_state.ctrlc.clone(), - head, - None, - )), - None, - ) - } else { - let stdout = child.as_mut().stdout.take().map(|out| { - RawStream::new( - Box::new(ByteLines::new(out)), - engine_state.ctrlc.clone(), - head, - None, - ) - }); - - let stderr = child.as_mut().stderr.take().map(|err| { - RawStream::new( - Box::new(ByteLines::new(err)), - engine_state.ctrlc.clone(), - head, - None, - ) - }); - - if matches!(self.err, OutDest::Pipe) { - (stderr, stdout) - } else { - (stdout, stderr) - } - }; - - // Create a thread to wait for an exit code. - thread::Builder::new() - .name("exit code waiter".into()) - .spawn(move || match child.as_mut().wait() { - Err(err) => Err(ShellError::ExternalCommand { - label: "External command exited with error".into(), - help: err.to_string(), - span, - }), - Ok(x) => { - #[cfg(unix)] - { - use nix::sys::signal::Signal; - use nu_ansi_term::{Color, Style}; - use std::os::unix::process::ExitStatusExt; - - if x.core_dumped() { - let cause = x - .signal() - .and_then(|sig| { - Signal::try_from(sig).ok().map(Signal::as_str) - }) - .unwrap_or("Something went wrong"); - - let style = Style::new().bold().on(Color::Red); - let message = format!( - "{cause}: child process '{commandname}' core dumped" - ); - eprintln!("{}", style.paint(&message)); - let _ = exit_code_tx.send(Value::error( - ShellError::ExternalCommand { - label: "core dumped".into(), - help: message, - span: head, - }, - head, - )); - return Ok(()); - } - } - if let Some(code) = x.code() { - let _ = exit_code_tx.send(Value::int(code as i64, head)); - } else if x.success() { - let _ = exit_code_tx.send(Value::int(0, head)); - } else { - let _ = exit_code_tx.send(Value::int(-1, head)); - } - Ok(()) - } - }) - .err_span(head)?; - - let exit_code = Some(ListStream::new( - ValueReceiver::new(exit_code_rx), - head, + Ok(PipelineData::ByteStream( + ByteStream::child(child, head), None, - )); - - Ok(PipelineData::ExternalStream { - stdout, - stderr, - exit_code, - span: head, - metadata: None, - trim_end_newline: true, - }) + )) } } } pub fn create_process( &self, - input: &PipelineData, + stdin: Stdio, use_cmd: bool, span: Span, ) -> Result<(CommandSys, Option), ShellError> { @@ -578,11 +508,7 @@ impl ExternalCommand { None }; - // If there is an input from the pipeline. The stdin from the process - // is piped so it can be used to send the input information - if !input.is_nothing() { - process.stdin(Stdio::piped()); - } + process.stdin(stdin); Ok((process, reader)) } @@ -764,51 +690,14 @@ fn remove_quotes(input: String) -> String { } } -struct ByteLines(BufReader); - -impl ByteLines { - fn new(read: R) -> Self { - Self(BufReader::new(read)) - } -} - -impl Iterator for ByteLines { - type Item = Result, ShellError>; - - fn next(&mut self) -> Option { - let mut buf = Vec::new(); - // `read_until` will never stop reading unless `\n` or EOF is encountered, - // so let's limit the number of bytes using `take` as the Rust docs suggest. - let capacity = self.0.capacity() as u64; - let mut reader = (&mut self.0).take(capacity); - match reader.read_until(b'\n', &mut buf) { - Ok(0) => None, - Ok(_) => Some(Ok(buf)), - Err(e) => Some(Err(e.into())), - } - } -} - -// Receiver used for the ListStream -// It implements iterator so it can be used as a ListStream -struct ValueReceiver { - rx: mpsc::Receiver, -} - -impl ValueReceiver { - pub fn new(rx: mpsc::Receiver) -> Self { - Self { rx } - } -} - -impl Iterator for ValueReceiver { - type Item = Value; - - fn next(&mut self) -> Option { - match self.rx.recv() { - Ok(v) => Some(v), - Err(_) => None, - } +fn extract_stdio(pipeline: PipelineData) -> (PipelineData, Stdio) { + match pipeline { + PipelineData::ByteStream(stream, metadata) => match stream.into_stdio() { + Ok(pipe) => (PipelineData::Empty, pipe), + Err(stream) => (PipelineData::ByteStream(stream, metadata), Stdio::piped()), + }, + PipelineData::Empty => (PipelineData::Empty, Stdio::inherit()), + data => (data, Stdio::piped()), } } diff --git a/crates/nu-command/src/viewers/table.rs b/crates/nu-command/src/viewers/table.rs index f4df2e03bc..26b8c921c5 100644 --- a/crates/nu-command/src/viewers/table.rs +++ b/crates/nu-command/src/viewers/table.rs @@ -6,7 +6,7 @@ use lscolors::{LsColors, Style}; use nu_color_config::{color_from_hex, StyleComputer, TextStyle}; use nu_engine::{command_prelude::*, env::get_config, env_to_string}; use nu_protocol::{ - Config, DataSource, ListStream, PipelineMetadata, RawStream, TableMode, ValueIterator, + ByteStream, Config, DataSource, ListStream, PipelineMetadata, TableMode, ValueIterator, }; use nu_table::{ common::create_nu_table_config, CollapsedTable, ExpandedTable, JustTable, NuTable, NuTableCell, @@ -14,8 +14,12 @@ use nu_table::{ }; use nu_utils::get_ls_colors; use std::{ - collections::VecDeque, io::IsTerminal, path::PathBuf, str::FromStr, sync::atomic::AtomicBool, - sync::Arc, time::Instant, + collections::VecDeque, + io::{Cursor, IsTerminal}, + path::PathBuf, + str::FromStr, + sync::{atomic::AtomicBool, Arc}, + time::Instant, }; use terminal_size::{Height, Width}; use url::Url; @@ -360,25 +364,16 @@ fn handle_table_command( ) -> Result { let span = input.data.span().unwrap_or(input.call.head); match input.data { - PipelineData::ExternalStream { .. } => Ok(input.data), + PipelineData::ByteStream(..) => Ok(input.data), PipelineData::Value(Value::Binary { val, .. }, ..) => { - let bytes = format!("{}\n", nu_pretty_hex::pretty_hex(&val)).into_bytes(); + let bytes = { + let mut str = nu_pretty_hex::pretty_hex(&val); + str.push('\n'); + str.into_bytes() + }; let ctrlc = input.engine_state.ctrlc.clone(); - let stream = RawStream::new( - Box::new([Ok(bytes)].into_iter()), - ctrlc, - input.call.head, - None, - ); - - Ok(PipelineData::ExternalStream { - stdout: Some(stream), - stderr: None, - exit_code: None, - span: input.call.head, - metadata: None, - trim_end_newline: false, - }) + let stream = ByteStream::read(Cursor::new(bytes), input.call.head, ctrlc); + Ok(PipelineData::ByteStream(stream, None)) } // None of these two receive a StyleComputer because handle_row_stream() can produce it by itself using engine_state and stack. PipelineData::Value(Value::List { vals, .. }, metadata) => { @@ -613,16 +608,8 @@ fn handle_row_stream( ctrlc.clone(), cfg, ); - let stream = RawStream::new(Box::new(paginator), ctrlc, input.call.head, None); - - Ok(PipelineData::ExternalStream { - stdout: Some(stream), - stderr: None, - exit_code: None, - span: input.call.head, - metadata: None, - trim_end_newline: false, - }) + let stream = ByteStream::from_result_iter(paginator, input.call.head, None); + Ok(PipelineData::ByteStream(stream, None)) } fn make_clickable_link( diff --git a/crates/nu-command/tests/format_conversions/csv.rs b/crates/nu-command/tests/format_conversions/csv.rs index 5915b3c4d4..a9be76d5c3 100644 --- a/crates/nu-command/tests/format_conversions/csv.rs +++ b/crates/nu-command/tests/format_conversions/csv.rs @@ -183,6 +183,7 @@ fn from_csv_text_with_tab_separator_to_table() { } #[test] +#[ignore = "csv crate has a bug when the last line is a comment: https://github.com/BurntSushi/rust-csv/issues/363"] fn from_csv_text_with_comments_to_table() { Playground::setup("filter_from_csv_test_5", |dirs, sandbox| { sandbox.with_files(&[FileWithContentToBeTrimmed( diff --git a/crates/nu-command/tests/format_conversions/tsv.rs b/crates/nu-command/tests/format_conversions/tsv.rs index 9627d0d0be..be57c60242 100644 --- a/crates/nu-command/tests/format_conversions/tsv.rs +++ b/crates/nu-command/tests/format_conversions/tsv.rs @@ -106,6 +106,7 @@ fn from_tsv_text_to_table() { } #[test] +#[ignore = "csv crate has a bug when the last line is a comment: https://github.com/BurntSushi/rust-csv/issues/363"] fn from_tsv_text_with_comments_to_table() { Playground::setup("filter_from_tsv_test_2", |dirs, sandbox| { sandbox.with_files(&[FileWithContentToBeTrimmed( diff --git a/crates/nu-engine/src/documentation.rs b/crates/nu-engine/src/documentation.rs index 2e966a312f..62e68eaa6c 100644 --- a/crates/nu-engine/src/documentation.rs +++ b/crates/nu-engine/src/documentation.rs @@ -53,7 +53,7 @@ fn nu_highlight_string(code_string: &str, engine_state: &EngineState, stack: &mu Value::string(code_string, Span::unknown()).into_pipeline_data(), ) { let result = output.into_value(Span::unknown()); - if let Ok(s) = result.coerce_into_string() { + if let Ok(s) = result.and_then(Value::coerce_into_string) { return s; // successfully highlighted string } } @@ -280,7 +280,7 @@ fn get_documentation( ) { Ok(output) => { let result = output.into_value(Span::unknown()); - match result.coerce_into_string() { + match result.and_then(Value::coerce_into_string) { Ok(s) => { let _ = write!(long_desc, "\n > {s}\n"); } diff --git a/crates/nu-engine/src/env.rs b/crates/nu-engine/src/env.rs index 44692dd131..048d9bfb99 100644 --- a/crates/nu-engine/src/env.rs +++ b/crates/nu-engine/src/env.rs @@ -350,14 +350,15 @@ fn get_converted_value( .and_then(|record| record.get(direction)); if let Some(conversion) = conversion { - match conversion.as_closure() { - Ok(closure) => ClosureEvalOnce::new(engine_state, stack, closure.clone()) - .debug(false) - .run_with_value(orig_val.clone()) - .map(|data| ConversionResult::Ok(data.into_value(orig_val.span()))) - .unwrap_or_else(ConversionResult::ConversionError), - Err(e) => ConversionResult::ConversionError(e), - } + conversion + .as_closure() + .and_then(|closure| { + ClosureEvalOnce::new(engine_state, stack, closure.clone()) + .debug(false) + .run_with_value(orig_val.clone()) + }) + .and_then(|data| data.into_value(orig_val.span())) + .map_or_else(ConversionResult::ConversionError, ConversionResult::Ok) } else { ConversionResult::CellPathError } diff --git a/crates/nu-engine/src/eval.rs b/crates/nu-engine/src/eval.rs index b8d806b708..0bc0c3727c 100644 --- a/crates/nu-engine/src/eval.rs +++ b/crates/nu-engine/src/eval.rs @@ -9,8 +9,8 @@ use nu_protocol::{ debugger::DebugContext, engine::{Closure, EngineState, Redirection, Stack}, eval_base::Eval, - Config, FromValue, IntoPipelineData, OutDest, PipelineData, ShellError, Span, Spanned, Type, - Value, VarId, ENV_VARIABLE_ID, + ByteStreamSource, Config, FromValue, IntoPipelineData, OutDest, PipelineData, ShellError, Span, + Spanned, Type, Value, VarId, ENV_VARIABLE_ID, }; use nu_utils::IgnoreCaseExt; use std::{borrow::Cow, fs::OpenOptions, path::PathBuf}; @@ -209,7 +209,6 @@ pub fn redirect_env(engine_state: &EngineState, caller_stack: &mut Stack, callee } } -#[allow(clippy::too_many_arguments)] fn eval_external( engine_state: &EngineState, stack: &mut Stack, @@ -284,7 +283,7 @@ pub fn eval_expression_with_input( let stack = &mut stack.start_capture(); // FIXME: protect this collect with ctrl-c input = eval_subexpression::(engine_state, stack, block, input)? - .into_value(*span) + .into_value(*span)? .follow_cell_path(&full_cell_path.tail, false)? .into_pipeline_data() } else { @@ -301,7 +300,7 @@ pub fn eval_expression_with_input( } }; - // If input is PipelineData::ExternalStream, + // If input an external command, // then `might_consume_external_result` will consume `stderr` if `stdout` is `None`. // This should not happen if the user wants to capture stderr. if !matches!(stack.stdout(), OutDest::Pipe | OutDest::Capture) @@ -309,15 +308,10 @@ pub fn eval_expression_with_input( { Ok((input, false)) } else { - Ok(might_consume_external_result(input)) + input.check_external_failed() } } -// Try to catch and detect if external command runs to failed. -fn might_consume_external_result(input: PipelineData) -> (PipelineData, bool) { - input.check_external_failed() -} - fn eval_redirection( engine_state: &EngineState, stack: &mut Stack, @@ -410,10 +404,17 @@ fn eval_element_with_input_inner( element: &PipelineElement, input: PipelineData, ) -> Result<(PipelineData, bool), ShellError> { - let (data, ok) = eval_expression_with_input::(engine_state, stack, &element.expr, input)?; + let (data, failed) = + eval_expression_with_input::(engine_state, stack, &element.expr, input)?; - if !matches!(data, PipelineData::ExternalStream { .. }) { - if let Some(redirection) = element.redirection.as_ref() { + if let Some(redirection) = element.redirection.as_ref() { + let is_external = if let PipelineData::ByteStream(stream, ..) = &data { + matches!(stream.source(), ByteStreamSource::Child(..)) + } else { + false + }; + + if !is_external { match redirection { &PipelineRedirection::Single { source: RedirectionSource::Stderr, @@ -424,8 +425,8 @@ fn eval_element_with_input_inner( .. } => { return Err(ShellError::GenericError { - error: "`e>|` only works with external streams".into(), - msg: "`e>|` only works on external streams".into(), + error: "`e>|` only works on external commands".into(), + msg: "`e>|` only works on external commands".into(), span: Some(span), help: None, inner: vec![], @@ -436,8 +437,8 @@ fn eval_element_with_input_inner( target: RedirectionTarget::Pipe { span }, } => { return Err(ShellError::GenericError { - error: "`o+e>|` only works with external streams".into(), - msg: "`o+e>|` only works on external streams".into(), + error: "`o+e>|` only works on external commands".into(), + msg: "`o+e>|` only works on external commands".into(), span: Some(span), help: None, inner: vec![], @@ -448,15 +449,33 @@ fn eval_element_with_input_inner( } } - let data = if matches!(stack.pipe_stdout(), Some(OutDest::File(_))) - && !matches!(stack.pipe_stderr(), Some(OutDest::Pipe)) - { - data.write_to_out_dests(engine_state, stack)? - } else { - data + let has_stdout_file = matches!(stack.pipe_stdout(), Some(OutDest::File(_))); + + let data = match &data { + PipelineData::Value(..) | PipelineData::ListStream(..) => { + if has_stdout_file { + data.write_to_out_dests(engine_state, stack)?; + PipelineData::Empty + } else { + data + } + } + PipelineData::ByteStream(stream, ..) => { + let write = match stream.source() { + ByteStreamSource::Read(_) | ByteStreamSource::File(_) => has_stdout_file, + ByteStreamSource::Child(_) => false, + }; + if write { + data.write_to_out_dests(engine_state, stack)?; + PipelineData::Empty + } else { + data + } + } + PipelineData::Empty => PipelineData::Empty, }; - Ok((data, ok)) + Ok((data, failed)) } fn eval_element_with_input( @@ -466,12 +485,18 @@ fn eval_element_with_input( input: PipelineData, ) -> Result<(PipelineData, bool), ShellError> { D::enter_element(engine_state, element); - - let result = eval_element_with_input_inner::(engine_state, stack, element, input); - - D::leave_element(engine_state, element, &result); - - result + match eval_element_with_input_inner::(engine_state, stack, element, input) { + Ok((data, failed)) => { + let res = Ok(data); + D::leave_element(engine_state, element, &res); + res.map(|data| (data, failed)) + } + Err(err) => { + let res = Err(err); + D::leave_element(engine_state, element, &res); + res.map(|data| (data, false)) + } + } } pub fn eval_block_with_early_return( @@ -555,17 +580,20 @@ pub fn eval_block( } input = PipelineData::Empty; match output { - stream @ PipelineData::ExternalStream { .. } => { - let exit_code = stream.drain_with_exit_code()?; - stack.add_env_var( - "LAST_EXIT_CODE".into(), - Value::int(exit_code, last.expr.span), - ); - if exit_code != 0 { - break; + PipelineData::ByteStream(stream, ..) => { + let span = stream.span(); + let status = stream.drain()?; + if let Some(status) = status { + stack.add_env_var( + "LAST_EXIT_CODE".into(), + Value::int(status.code().into(), span), + ); + if status.code() != 0 { + break; + } } } - PipelineData::ListStream(stream, _) => { + PipelineData::ListStream(stream, ..) => { stream.drain()?; } PipelineData::Value(..) | PipelineData::Empty => {} @@ -684,7 +712,7 @@ impl Eval for EvalRuntime { _: Span, ) -> Result { // FIXME: protect this collect with ctrl-c - Ok(eval_call::(engine_state, stack, call, PipelineData::empty())?.into_value(call.head)) + eval_call::(engine_state, stack, call, PipelineData::empty())?.into_value(call.head) } fn eval_external_call( @@ -696,7 +724,7 @@ impl Eval for EvalRuntime { ) -> Result { let span = head.span; // FIXME: protect this collect with ctrl-c - Ok(eval_external(engine_state, stack, head, args, PipelineData::empty())?.into_value(span)) + eval_external(engine_state, stack, head, args, PipelineData::empty())?.into_value(span) } fn eval_subexpression( @@ -706,12 +734,8 @@ impl Eval for EvalRuntime { span: Span, ) -> Result { let block = engine_state.get_block(block_id); - // FIXME: protect this collect with ctrl-c - Ok( - eval_subexpression::(engine_state, stack, block, PipelineData::empty())? - .into_value(span), - ) + eval_subexpression::(engine_state, stack, block, PipelineData::empty())?.into_value(span) } fn regex_match( diff --git a/crates/nu-explore/src/nu_common/value.rs b/crates/nu-explore/src/nu_common/value.rs index 17b277cac5..8aa71a28bf 100644 --- a/crates/nu-explore/src/nu_common/value.rs +++ b/crates/nu-explore/src/nu_common/value.rs @@ -1,7 +1,7 @@ use super::NuSpan; use anyhow::Result; use nu_engine::get_columns; -use nu_protocol::{record, ListStream, PipelineData, PipelineMetadata, RawStream, Value}; +use nu_protocol::{record, ByteStream, ListStream, PipelineData, PipelineMetadata, Value}; use std::collections::HashMap; pub fn collect_pipeline(input: PipelineData) -> Result<(Vec, Vec>)> { @@ -9,16 +9,7 @@ pub fn collect_pipeline(input: PipelineData) -> Result<(Vec, Vec Ok((vec![], vec![])), PipelineData::Value(value, ..) => collect_input(value), PipelineData::ListStream(stream, ..) => Ok(collect_list_stream(stream)), - PipelineData::ExternalStream { - stdout, - stderr, - exit_code, - metadata, - span, - .. - } => Ok(collect_external_stream( - stdout, stderr, exit_code, metadata, span, - )), + PipelineData::ByteStream(stream, metadata) => Ok(collect_byte_stream(stream, metadata)), } } @@ -42,49 +33,60 @@ fn collect_list_stream(stream: ListStream) -> (Vec, Vec>) { (cols, data) } -fn collect_external_stream( - stdout: Option, - stderr: Option, - exit_code: Option, +fn collect_byte_stream( + stream: ByteStream, metadata: Option, - span: NuSpan, ) -> (Vec, Vec>) { + let span = stream.span(); + let mut columns = vec![]; let mut data = vec![]; - if let Some(stdout) = stdout { - let value = stdout.into_string().map_or_else( - |error| Value::error(error, span), - |string| Value::string(string.item, span), - ); - columns.push(String::from("stdout")); - data.push(value); - } - if let Some(stderr) = stderr { - let value = stderr.into_string().map_or_else( - |error| Value::error(error, span), - |string| Value::string(string.item, span), - ); + match stream.into_child() { + Ok(child) => match child.wait_with_output() { + Ok(output) => { + let exit_code = output.exit_status.code(); + if let Some(stdout) = output.stdout { + columns.push(String::from("stdout")); + data.push(string_or_binary(stdout, span)); + } + if let Some(stderr) = output.stderr { + columns.push(String::from("stderr")); + data.push(string_or_binary(stderr, span)); + } + columns.push(String::from("exit_code")); + data.push(Value::int(exit_code.into(), span)); + } + Err(err) => { + columns.push("".into()); + data.push(Value::error(err, span)); + } + }, + Err(stream) => { + let value = stream + .into_value() + .unwrap_or_else(|err| Value::error(err, span)); - columns.push(String::from("stderr")); - data.push(value); + columns.push("".into()); + data.push(value); + } } - if let Some(exit_code) = exit_code { - let list = exit_code.into_iter().collect::>(); - let val = Value::list(list, span); - columns.push(String::from("exit_code")); - data.push(val); - } if metadata.is_some() { let val = Value::record(record! { "data_source" => Value::string("ls", span) }, span); - columns.push(String::from("metadata")); data.push(val); } (columns, vec![data]) } +fn string_or_binary(bytes: Vec, span: NuSpan) -> Value { + match String::from_utf8(bytes) { + Ok(str) => Value::string(str, span), + Err(err) => Value::binary(err.into_bytes(), span), + } +} + /// Try to build column names and a table grid. pub fn collect_input(value: Value) -> Result<(Vec, Vec>)> { let span = value.span(); diff --git a/crates/nu-plugin-core/src/interface/mod.rs b/crates/nu-plugin-core/src/interface/mod.rs index 3fb86aee36..b4a2bc9a25 100644 --- a/crates/nu-plugin-core/src/interface/mod.rs +++ b/crates/nu-plugin-core/src/interface/mod.rs @@ -1,15 +1,10 @@ //! Implements the stream multiplexing interface for both the plugin side and the engine side. -use nu_plugin_protocol::{ - ExternalStreamInfo, ListStreamInfo, PipelineDataHeader, RawStreamInfo, StreamMessage, -}; -use nu_protocol::{ListStream, PipelineData, RawStream, ShellError}; +use nu_plugin_protocol::{ByteStreamInfo, ListStreamInfo, PipelineDataHeader, StreamMessage}; +use nu_protocol::{ByteStream, IntoSpanned, ListStream, PipelineData, Reader, ShellError}; use std::{ - io::Write, - sync::{ - atomic::{AtomicBool, Ordering::Relaxed}, - Arc, Mutex, - }, + io::{Read, Write}, + sync::{atomic::AtomicBool, Arc, Mutex}, thread, }; @@ -185,31 +180,10 @@ pub trait InterfaceManager { let reader = handle.read_stream(info.id, self.get_interface())?; ListStream::new(reader, info.span, ctrlc.cloned()).into() } - PipelineDataHeader::ExternalStream(info) => { + PipelineDataHeader::ByteStream(info) => { let handle = self.stream_manager().get_handle(); - let span = info.span; - let new_raw_stream = |raw_info: RawStreamInfo| { - let reader = handle.read_stream(raw_info.id, self.get_interface())?; - let mut stream = - RawStream::new(Box::new(reader), ctrlc.cloned(), span, raw_info.known_size); - stream.is_binary = raw_info.is_binary; - Ok::<_, ShellError>(stream) - }; - PipelineData::ExternalStream { - stdout: info.stdout.map(new_raw_stream).transpose()?, - stderr: info.stderr.map(new_raw_stream).transpose()?, - exit_code: info - .exit_code - .map(|list_info| { - handle - .read_stream(list_info.id, self.get_interface()) - .map(|reader| ListStream::new(reader, info.span, ctrlc.cloned())) - }) - .transpose()?, - span: info.span, - metadata: None, - trim_end_newline: info.trim_end_newline, - } + let reader = handle.read_stream(info.id, self.get_interface())?; + ByteStream::from_result_iter(reader, info.span, ctrlc.cloned()).into() } }) } @@ -271,11 +245,11 @@ pub trait Interface: Clone + Send { Ok::<_, ShellError>((id, writer)) }; match self.prepare_pipeline_data(data, context)? { - PipelineData::Value(value, _) => { + PipelineData::Value(value, ..) => { Ok((PipelineDataHeader::Value(value), PipelineDataWriter::None)) } PipelineData::Empty => Ok((PipelineDataHeader::Empty, PipelineDataWriter::None)), - PipelineData::ListStream(stream, _) => { + PipelineData::ListStream(stream, ..) => { let (id, writer) = new_stream(LIST_STREAM_HIGH_PRESSURE)?; Ok(( PipelineDataHeader::ListStream(ListStreamInfo { @@ -285,50 +259,15 @@ pub trait Interface: Clone + Send { PipelineDataWriter::ListStream(writer, stream), )) } - PipelineData::ExternalStream { - stdout, - stderr, - exit_code, - span, - metadata: _, - trim_end_newline, - } => { - // Create the writers and stream ids - let stdout_stream = stdout - .is_some() - .then(|| new_stream(RAW_STREAM_HIGH_PRESSURE)) - .transpose()?; - let stderr_stream = stderr - .is_some() - .then(|| new_stream(RAW_STREAM_HIGH_PRESSURE)) - .transpose()?; - let exit_code_stream = exit_code - .is_some() - .then(|| new_stream(LIST_STREAM_HIGH_PRESSURE)) - .transpose()?; - // Generate the header, with the stream ids - let header = PipelineDataHeader::ExternalStream(ExternalStreamInfo { - span, - stdout: stdout - .as_ref() - .zip(stdout_stream.as_ref()) - .map(|(stream, (id, _))| RawStreamInfo::new(*id, stream)), - stderr: stderr - .as_ref() - .zip(stderr_stream.as_ref()) - .map(|(stream, (id, _))| RawStreamInfo::new(*id, stream)), - exit_code: exit_code_stream - .as_ref() - .map(|&(id, _)| ListStreamInfo { id, span }), - trim_end_newline, - }); - // Collect the writers - let writer = PipelineDataWriter::ExternalStream { - stdout: stdout_stream.map(|(_, writer)| writer).zip(stdout), - stderr: stderr_stream.map(|(_, writer)| writer).zip(stderr), - exit_code: exit_code_stream.map(|(_, writer)| writer).zip(exit_code), - }; - Ok((header, writer)) + PipelineData::ByteStream(stream, ..) => { + let span = stream.span(); + if let Some(reader) = stream.reader() { + let (id, writer) = new_stream(RAW_STREAM_HIGH_PRESSURE)?; + let header = PipelineDataHeader::ByteStream(ByteStreamInfo { id, span }); + Ok((header, PipelineDataWriter::ByteStream(writer, reader))) + } else { + Ok((PipelineDataHeader::Empty, PipelineDataWriter::None)) + } } } } @@ -355,11 +294,7 @@ pub enum PipelineDataWriter { #[default] None, ListStream(StreamWriter, ListStream), - ExternalStream { - stdout: Option<(StreamWriter, RawStream)>, - stderr: Option<(StreamWriter, RawStream)>, - exit_code: Option<(StreamWriter, ListStream)>, - }, + ByteStream(StreamWriter, Reader), } impl PipelineDataWriter @@ -376,49 +311,16 @@ where writer.write_all(stream)?; Ok(()) } - // Write all three possible streams of an ExternalStream on separate threads. - PipelineDataWriter::ExternalStream { - stdout, - stderr, - exit_code, - } => { - thread::scope(|scope| { - let stderr_thread = stderr - .map(|(mut writer, stream)| { - thread::Builder::new() - .name("plugin stderr writer".into()) - .spawn_scoped(scope, move || { - writer.write_all(raw_stream_iter(stream)) - }) - }) - .transpose()?; - let exit_code_thread = exit_code - .map(|(mut writer, stream)| { - thread::Builder::new() - .name("plugin exit_code writer".into()) - .spawn_scoped(scope, move || writer.write_all(stream)) - }) - .transpose()?; - // Optimize for stdout: if only stdout is present, don't spawn any other - // threads. - if let Some((mut writer, stream)) = stdout { - writer.write_all(raw_stream_iter(stream))?; - } - let panicked = |thread_name: &str| { - Err(ShellError::NushellFailed { - msg: format!( - "{thread_name} thread panicked in PipelineDataWriter::write" - ), - }) - }; - stderr_thread - .map(|t| t.join().unwrap_or_else(|_| panicked("stderr"))) - .transpose()?; - exit_code_thread - .map(|t| t.join().unwrap_or_else(|_| panicked("exit_code"))) - .transpose()?; - Ok(()) - }) + // Write a byte stream. + PipelineDataWriter::ByteStream(mut writer, mut reader) => { + let span = reader.span(); + let buf = &mut [0; 8192]; + writer.write_all(std::iter::from_fn(move || match reader.read(buf) { + Ok(0) => None, + Ok(len) => Some(Ok(buf[..len].to_vec())), + Err(err) => Some(Err(ShellError::from(err.into_spanned(span)))), + }))?; + Ok(()) } } } @@ -446,11 +348,3 @@ where } } } - -/// Custom iterator for [`RawStream`] that respects ctrlc, but still has binary chunks -fn raw_stream_iter(stream: RawStream) -> impl Iterator, ShellError>> { - let ctrlc = stream.ctrlc; - stream - .stream - .take_while(move |_| ctrlc.as_ref().map(|b| !b.load(Relaxed)).unwrap_or(true)) -} diff --git a/crates/nu-plugin-core/src/interface/tests.rs b/crates/nu-plugin-core/src/interface/tests.rs index ce7be52f30..fb3d737190 100644 --- a/crates/nu-plugin-core/src/interface/tests.rs +++ b/crates/nu-plugin-core/src/interface/tests.rs @@ -6,11 +6,12 @@ use super::{ Interface, InterfaceManager, PluginRead, PluginWrite, }; use nu_plugin_protocol::{ - ExternalStreamInfo, ListStreamInfo, PipelineDataHeader, PluginInput, PluginOutput, - RawStreamInfo, StreamData, StreamMessage, + ByteStreamInfo, ListStreamInfo, PipelineDataHeader, PluginInput, PluginOutput, StreamData, + StreamMessage, }; use nu_protocol::{ - DataSource, ListStream, PipelineData, PipelineMetadata, RawStream, ShellError, Span, Value, + ByteStream, ByteStreamSource, DataSource, ListStream, PipelineData, PipelineMetadata, + ShellError, Span, Value, }; use std::{path::Path, sync::Arc}; @@ -140,9 +141,9 @@ fn read_pipeline_data_value() -> Result<(), ShellError> { let header = PipelineDataHeader::Value(value.clone()); match manager.read_pipeline_data(header, None)? { - PipelineData::Value(read_value, _) => assert_eq!(value, read_value), - PipelineData::ListStream(_, _) => panic!("unexpected ListStream"), - PipelineData::ExternalStream { .. } => panic!("unexpected ExternalStream"), + PipelineData::Value(read_value, ..) => assert_eq!(value, read_value), + PipelineData::ListStream(..) => panic!("unexpected ListStream"), + PipelineData::ByteStream(..) => panic!("unexpected ByteStream"), PipelineData::Empty => panic!("unexpected Empty"), } @@ -188,47 +189,25 @@ fn read_pipeline_data_list_stream() -> Result<(), ShellError> { } #[test] -fn read_pipeline_data_external_stream() -> Result<(), ShellError> { +fn read_pipeline_data_byte_stream() -> Result<(), ShellError> { let test = TestCase::new(); let mut manager = TestInterfaceManager::new(&test); let iterations = 100; let out_pattern = b"hello".to_vec(); - let err_pattern = vec![5, 4, 3, 2]; - test.add(StreamMessage::Data(14, Value::test_int(1).into())); for _ in 0..iterations { test.add(StreamMessage::Data( 12, StreamData::Raw(Ok(out_pattern.clone())), )); - test.add(StreamMessage::Data( - 13, - StreamData::Raw(Ok(err_pattern.clone())), - )); } test.add(StreamMessage::End(12)); - test.add(StreamMessage::End(13)); - test.add(StreamMessage::End(14)); let test_span = Span::new(10, 13); - let header = PipelineDataHeader::ExternalStream(ExternalStreamInfo { + let header = PipelineDataHeader::ByteStream(ByteStreamInfo { + id: 12, span: test_span, - stdout: Some(RawStreamInfo { - id: 12, - is_binary: false, - known_size: Some((out_pattern.len() * iterations) as u64), - }), - stderr: Some(RawStreamInfo { - id: 13, - is_binary: true, - known_size: None, - }), - exit_code: Some(ListStreamInfo { - id: 14, - span: Span::test_data(), - }), - trim_end_newline: true, }); let pipe = manager.read_pipeline_data(header, None)?; @@ -237,52 +216,28 @@ fn read_pipeline_data_external_stream() -> Result<(), ShellError> { manager.consume_all()?; match pipe { - PipelineData::ExternalStream { - stdout, - stderr, - exit_code, - span, - metadata, - trim_end_newline, - } => { - let stdout = stdout.expect("stdout is None"); - let stderr = stderr.expect("stderr is None"); - let exit_code = exit_code.expect("exit_code is None"); - assert_eq!(test_span, span); + PipelineData::ByteStream(stream, metadata) => { + assert_eq!(test_span, stream.span()); assert!( metadata.is_some(), "expected metadata to be Some due to prepare_pipeline_data()" ); - assert!(trim_end_newline); - assert!(!stdout.is_binary); - assert!(stderr.is_binary); - - assert_eq!( - Some((out_pattern.len() * iterations) as u64), - stdout.known_size - ); - assert_eq!(None, stderr.known_size); - - // check the streams - let mut count = 0; - for chunk in stdout.stream { - assert_eq!(out_pattern, chunk?); - count += 1; + match stream.into_source() { + ByteStreamSource::Read(mut read) => { + let mut buf = Vec::new(); + read.read_to_end(&mut buf)?; + let iter = buf.chunks_exact(out_pattern.len()); + assert_eq!(iter.len(), iterations); + for chunk in iter { + assert_eq!(out_pattern, chunk) + } + } + ByteStreamSource::File(..) => panic!("unexpected byte stream source: file"), + ByteStreamSource::Child(..) => { + panic!("unexpected byte stream source: child") + } } - assert_eq!(iterations, count, "stdout length"); - let mut count = 0; - - for chunk in stderr.stream { - assert_eq!(err_pattern, chunk?); - count += 1; - } - assert_eq!(iterations, count, "stderr length"); - - assert_eq!( - vec![Value::test_int(1)], - exit_code.into_iter().collect::>() - ); } _ => panic!("unexpected PipelineData: {pipe:?}"), } @@ -436,120 +391,51 @@ fn write_pipeline_data_list_stream() -> Result<(), ShellError> { } #[test] -fn write_pipeline_data_external_stream() -> Result<(), ShellError> { +fn write_pipeline_data_byte_stream() -> Result<(), ShellError> { let test = TestCase::new(); let manager = TestInterfaceManager::new(&test); let interface = manager.get_interface(); - let stdout_bufs = vec![ - b"hello".to_vec(), - b"world".to_vec(), - b"these are tests".to_vec(), - ]; - let stdout_len = stdout_bufs.iter().map(|b| b.len() as u64).sum::(); - let stderr_bufs = vec![b"error messages".to_vec(), b"go here".to_vec()]; - let exit_code = Value::test_int(7); - + let expected = "hello\nworld\nthese are tests"; let span = Span::new(400, 500); - // Set up pipeline data for an external stream - let pipe = PipelineData::ExternalStream { - stdout: Some(RawStream::new( - Box::new(stdout_bufs.clone().into_iter().map(Ok)), - None, - span, - Some(stdout_len), - )), - stderr: Some(RawStream::new( - Box::new(stderr_bufs.clone().into_iter().map(Ok)), - None, - span, - None, - )), - exit_code: Some(ListStream::new( - std::iter::once(exit_code.clone()), - Span::test_data(), - None, - )), - span, - metadata: None, - trim_end_newline: true, - }; + // Set up pipeline data for a byte stream + let data = PipelineData::ByteStream( + ByteStream::read(std::io::Cursor::new(expected), span, None), + None, + ); - let (header, writer) = interface.init_write_pipeline_data(pipe, &())?; + let (header, writer) = interface.init_write_pipeline_data(data, &())?; let info = match header { - PipelineDataHeader::ExternalStream(info) => info, + PipelineDataHeader::ByteStream(info) => info, _ => panic!("unexpected header: {header:?}"), }; writer.write()?; - let stdout_info = info.stdout.as_ref().expect("stdout info is None"); - let stderr_info = info.stderr.as_ref().expect("stderr info is None"); - let exit_code_info = info.exit_code.as_ref().expect("exit code info is None"); - assert_eq!(span, info.span); - assert!(info.trim_end_newline); - - assert_eq!(Some(stdout_len), stdout_info.known_size); - assert_eq!(None, stderr_info.known_size); // Now make sure the stream messages have been written - let mut stdout_iter = stdout_bufs.into_iter(); - let mut stderr_iter = stderr_bufs.into_iter(); - let mut exit_code_iter = std::iter::once(exit_code); + let mut actual = Vec::new(); + let mut ended = false; - let mut stdout_ended = false; - let mut stderr_ended = false; - let mut exit_code_ended = false; - - // There's no specific order these messages must come in with respect to how the streams are - // interleaved, but all of the data for each stream must be in its original order, and the - // End must come after all Data for msg in test.written() { match msg { PluginOutput::Data(id, data) => { - if id == stdout_info.id { - let result: Result, ShellError> = - data.try_into().expect("wrong data in stdout stream"); - assert_eq!( - stdout_iter.next().expect("too much data in stdout"), - result.expect("unexpected error in stdout stream") - ); - } else if id == stderr_info.id { - let result: Result, ShellError> = - data.try_into().expect("wrong data in stderr stream"); - assert_eq!( - stderr_iter.next().expect("too much data in stderr"), - result.expect("unexpected error in stderr stream") - ); - } else if id == exit_code_info.id { - let code: Value = data.try_into().expect("wrong data in stderr stream"); - assert_eq!( - exit_code_iter.next().expect("too much data in stderr"), - code - ); + if id == info.id { + let data: Result, ShellError> = + data.try_into().expect("wrong data in stream"); + + let data = data.expect("unexpected error in stream"); + actual.extend(data); } else { panic!("unrecognized stream id: {id}"); } } PluginOutput::End(id) => { - if id == stdout_info.id { - assert!(!stdout_ended, "double End of stdout"); - assert!(stdout_iter.next().is_none(), "unexpected end of stdout"); - stdout_ended = true; - } else if id == stderr_info.id { - assert!(!stderr_ended, "double End of stderr"); - assert!(stderr_iter.next().is_none(), "unexpected end of stderr"); - stderr_ended = true; - } else if id == exit_code_info.id { - assert!(!exit_code_ended, "double End of exit_code"); - assert!( - exit_code_iter.next().is_none(), - "unexpected end of exit_code" - ); - exit_code_ended = true; + if id == info.id { + ended = true; } else { panic!("unrecognized stream id: {id}"); } @@ -558,9 +444,8 @@ fn write_pipeline_data_external_stream() -> Result<(), ShellError> { } } - assert!(stdout_ended, "stdout did not End"); - assert!(stderr_ended, "stderr did not End"); - assert!(exit_code_ended, "exit_code did not End"); + assert_eq!(expected.as_bytes(), actual); + assert!(ended, "stream did not End"); Ok(()) } diff --git a/crates/nu-plugin-engine/src/context.rs b/crates/nu-plugin-engine/src/context.rs index 0fb7b95b4c..0b1d56c050 100644 --- a/crates/nu-plugin-engine/src/context.rs +++ b/crates/nu-plugin-engine/src/context.rs @@ -108,7 +108,7 @@ impl<'a> PluginExecutionContext for PluginExecutionCommandContext<'a> { Value::Closure { val, .. } => { ClosureEvalOnce::new(&self.engine_state, &self.stack, *val) .run_with_input(PipelineData::Empty) - .map(|data| data.into_value(span)) + .and_then(|data| data.into_value(span)) .unwrap_or_else(|err| Value::error(err, self.call.head)) } _ => value.clone(), diff --git a/crates/nu-plugin-engine/src/init.rs b/crates/nu-plugin-engine/src/init.rs index 0ba70b49c0..198a01cd1c 100644 --- a/crates/nu-plugin-engine/src/init.rs +++ b/crates/nu-plugin-engine/src/init.rs @@ -26,7 +26,7 @@ use crate::{ /// This should be larger than the largest commonly sent message to avoid excessive fragmentation. /// -/// The buffers coming from external streams are typically each 8192 bytes, so double that. +/// The buffers coming from byte streams are typically each 8192 bytes, so double that. pub(crate) const OUTPUT_BUFFER_SIZE: usize = 16384; /// Spawn the command for a plugin, in the given `mode`. After spawning, it can be passed to diff --git a/crates/nu-plugin-engine/src/interface/mod.rs b/crates/nu-plugin-engine/src/interface/mod.rs index 3447d6a907..adab9dc68d 100644 --- a/crates/nu-plugin-engine/src/interface/mod.rs +++ b/crates/nu-plugin-engine/src/interface/mod.rs @@ -519,8 +519,8 @@ impl InterfaceManager for PluginInterfaceManager { .map_data(|data| { let ctrlc = self.get_ctrlc(id)?; - // Register the streams in the response - for stream_id in data.stream_ids() { + // Register the stream in the response + if let Some(stream_id) = data.stream_id() { self.recv_stream_started(id, stream_id); } @@ -602,7 +602,7 @@ impl InterfaceManager for PluginInterfaceManager { meta, )) } - PipelineData::Empty | PipelineData::ExternalStream { .. } => Ok(data), + PipelineData::Empty | PipelineData::ByteStream(..) => Ok(data), } } @@ -953,7 +953,7 @@ impl PluginInterface { let call = PluginCall::CustomValueOp(value.map(|cv| cv.without_source()), op); match self.plugin_call(call, None)? { - PluginCallResponse::PipelineData(out_data) => Ok(out_data.into_value(span)), + PluginCallResponse::PipelineData(out_data) => out_data.into_value(span), PluginCallResponse::Error(err) => Err(err.into()), _ => Err(ShellError::PluginFailedToDecode { msg: format!("Received unexpected response to custom value {op_name}() call"), @@ -1091,7 +1091,7 @@ impl Interface for PluginInterface { meta, )) } - PipelineData::Empty | PipelineData::ExternalStream { .. } => Ok(data), + PipelineData::Empty | PipelineData::ByteStream(..) => Ok(data), } } } diff --git a/crates/nu-plugin-engine/src/interface/tests.rs b/crates/nu-plugin-engine/src/interface/tests.rs index 7548703191..aca59a664e 100644 --- a/crates/nu-plugin-engine/src/interface/tests.rs +++ b/crates/nu-plugin-engine/src/interface/tests.rs @@ -9,10 +9,10 @@ use crate::{ use nu_plugin_core::{interface_test_util::TestCase, Interface, InterfaceManager}; use nu_plugin_protocol::{ test_util::{expected_test_custom_value, test_plugin_custom_value}, - CallInfo, CustomValueOp, EngineCall, EngineCallResponse, EvaluatedCall, ExternalStreamInfo, + ByteStreamInfo, CallInfo, CustomValueOp, EngineCall, EngineCallResponse, EvaluatedCall, ListStreamInfo, PipelineDataHeader, PluginCall, PluginCallId, PluginCallResponse, - PluginCustomValue, PluginInput, PluginOutput, Protocol, ProtocolInfo, RawStreamInfo, - StreamData, StreamMessage, + PluginCustomValue, PluginInput, PluginOutput, Protocol, ProtocolInfo, StreamData, + StreamMessage, }; use nu_protocol::{ ast::{Math, Operator}, @@ -154,16 +154,9 @@ fn manager_consume_all_propagates_message_error_to_readers() -> Result<(), Shell test.add(invalid_output()); let stream = manager.read_pipeline_data( - PipelineDataHeader::ExternalStream(ExternalStreamInfo { + PipelineDataHeader::ByteStream(ByteStreamInfo { + id: 0, span: Span::test_data(), - stdout: Some(RawStreamInfo { - id: 0, - is_binary: false, - known_size: None, - }), - stderr: None, - exit_code: None, - trim_end_newline: false, }), None, )?; @@ -378,7 +371,7 @@ fn manager_consume_call_response_registers_streams() -> Result<(), ShellError> { fake_plugin_call(&mut manager, n); } - // Check list streams, external streams + // Check list streams, byte streams manager.consume(PluginOutput::CallResponse( 0, PluginCallResponse::PipelineData(PipelineDataHeader::ListStream(ListStreamInfo { @@ -388,23 +381,9 @@ fn manager_consume_call_response_registers_streams() -> Result<(), ShellError> { ))?; manager.consume(PluginOutput::CallResponse( 1, - PluginCallResponse::PipelineData(PipelineDataHeader::ExternalStream(ExternalStreamInfo { + PluginCallResponse::PipelineData(PipelineDataHeader::ByteStream(ByteStreamInfo { + id: 1, span: Span::test_data(), - stdout: Some(RawStreamInfo { - id: 1, - is_binary: false, - known_size: None, - }), - stderr: Some(RawStreamInfo { - id: 2, - is_binary: false, - known_size: None, - }), - exit_code: Some(ListStreamInfo { - id: 3, - span: Span::test_data(), - }), - trim_end_newline: false, })), ))?; @@ -423,22 +402,20 @@ fn manager_consume_call_response_registers_streams() -> Result<(), ShellError> { "plugin_call_input_streams[0] should be Some(0)" ); - // ExternalStream should have three + // ByteStream should have one if let Some(sub) = manager.plugin_call_states.get(&1) { assert_eq!( - 3, sub.remaining_streams_to_read, - "ExternalStream remaining_streams_to_read should be 3" + 1, sub.remaining_streams_to_read, + "ByteStream remaining_streams_to_read should be 1" ); } else { - panic!("failed to find subscription for ExternalStream (1), maybe it was removed"); - } - for n in [1, 2, 3] { - assert_eq!( - Some(&1), - manager.plugin_call_input_streams.get(&n), - "plugin_call_input_streams[{n}] should be Some(1)" - ); + panic!("failed to find subscription for ByteStream (1), maybe it was removed"); } + assert_eq!( + Some(&1), + manager.plugin_call_input_streams.get(&1), + "plugin_call_input_streams[1] should be Some(1)" + ); Ok(()) } @@ -1087,7 +1064,7 @@ fn interface_run() -> Result<(), ShellError> { assert_eq!( Value::test_int(number), - result.into_value(Span::test_data()) + result.into_value(Span::test_data())?, ); assert!(test.has_unconsumed_write()); Ok(()) @@ -1136,7 +1113,7 @@ fn interface_prepare_pipeline_data_accepts_normal_values() -> Result<(), ShellEr match interface.prepare_pipeline_data(PipelineData::Value(value.clone(), None), &state) { Ok(data) => assert_eq!( value.get_type(), - data.into_value(Span::test_data()).get_type() + data.into_value(Span::test_data())?.get_type(), ), Err(err) => panic!("failed to accept {value:?}: {err}"), } diff --git a/crates/nu-plugin-protocol/src/lib.rs b/crates/nu-plugin-protocol/src/lib.rs index e40136ca56..ea27f82654 100644 --- a/crates/nu-plugin-protocol/src/lib.rs +++ b/crates/nu-plugin-protocol/src/lib.rs @@ -22,7 +22,7 @@ mod tests; pub mod test_util; use nu_protocol::{ - ast::Operator, engine::Closure, Config, LabeledError, PipelineData, PluginSignature, RawStream, + ast::Operator, engine::Closure, Config, LabeledError, PipelineData, PluginSignature, ShellError, Span, Spanned, Value, }; use serde::{Deserialize, Serialize}; @@ -82,32 +82,20 @@ pub enum PipelineDataHeader { /// /// Items are sent via [`StreamData`] ListStream(ListStreamInfo), - /// Initiate [`nu_protocol::PipelineData::ExternalStream`]. + /// Initiate [`nu_protocol::PipelineData::ByteStream`]. /// /// Items are sent via [`StreamData`] - ExternalStream(ExternalStreamInfo), + ByteStream(ByteStreamInfo), } impl PipelineDataHeader { - /// Return a list of stream IDs embedded in the header - pub fn stream_ids(&self) -> Vec { + /// Return the stream ID, if any, embedded in the header + pub fn stream_id(&self) -> Option { match self { - PipelineDataHeader::Empty => vec![], - PipelineDataHeader::Value(_) => vec![], - PipelineDataHeader::ListStream(info) => vec![info.id], - PipelineDataHeader::ExternalStream(info) => { - let mut out = vec![]; - if let Some(stdout) = &info.stdout { - out.push(stdout.id); - } - if let Some(stderr) = &info.stderr { - out.push(stderr.id); - } - if let Some(exit_code) = &info.exit_code { - out.push(exit_code.id); - } - out - } + PipelineDataHeader::Empty => None, + PipelineDataHeader::Value(_) => None, + PipelineDataHeader::ListStream(info) => Some(info.id), + PipelineDataHeader::ByteStream(info) => Some(info.id), } } } @@ -119,32 +107,11 @@ pub struct ListStreamInfo { pub span: Span, } -/// Additional information about external streams +/// Additional information about byte streams #[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)] -pub struct ExternalStreamInfo { - pub span: Span, - pub stdout: Option, - pub stderr: Option, - pub exit_code: Option, - pub trim_end_newline: bool, -} - -/// Additional information about raw (byte) streams -#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)] -pub struct RawStreamInfo { +pub struct ByteStreamInfo { pub id: StreamId, - pub is_binary: bool, - pub known_size: Option, -} - -impl RawStreamInfo { - pub fn new(id: StreamId, stream: &RawStream) -> Self { - RawStreamInfo { - id, - is_binary: stream.is_binary, - known_size: stream.known_size, - } - } + pub span: Span, } /// Calls that a plugin can execute. The type parameter determines the input type. @@ -380,7 +347,7 @@ impl PluginCallResponse { PipelineData::Empty => false, PipelineData::Value(..) => false, PipelineData::ListStream(..) => true, - PipelineData::ExternalStream { .. } => true, + PipelineData::ByteStream(..) => true, }, _ => false, } diff --git a/crates/nu-plugin-test-support/src/lib.rs b/crates/nu-plugin-test-support/src/lib.rs index 8aa675fd1f..caa7cbac1a 100644 --- a/crates/nu-plugin-test-support/src/lib.rs +++ b/crates/nu-plugin-test-support/src/lib.rs @@ -82,7 +82,7 @@ //! let input = vec![Value::test_string("FooBar")].into_pipeline_data(Span::test_data(), None); //! let output = PluginTest::new("lowercase", LowercasePlugin.into())? //! .eval_with("lowercase", input)? -//! .into_value(Span::test_data()); +//! .into_value(Span::test_data())?; //! //! assert_eq!( //! Value::test_list(vec![ diff --git a/crates/nu-plugin-test-support/src/plugin_test.rs b/crates/nu-plugin-test-support/src/plugin_test.rs index 18eee356ef..3d6b3eec23 100644 --- a/crates/nu-plugin-test-support/src/plugin_test.rs +++ b/crates/nu-plugin-test-support/src/plugin_test.rs @@ -93,7 +93,7 @@ impl PluginTest { /// "my-command", /// vec![Value::test_int(42)].into_pipeline_data(Span::test_data(), None) /// )? - /// .into_value(Span::test_data()); + /// .into_value(Span::test_data())?; /// assert_eq!(Value::test_string("42"), result); /// # Ok(()) /// # } @@ -136,33 +136,44 @@ impl PluginTest { // Serialize custom values in the input let source = self.source.clone(); - let input = input.map( - move |mut value| { - let result = PluginCustomValue::serialize_custom_values_in(&mut value) - // Make sure to mark them with the source so they pass correctly, too. - .and_then(|_| PluginCustomValueWithSource::add_source_in(&mut value, &source)); - match result { - Ok(()) => value, - Err(err) => Value::error(err, value.span()), - } - }, - None, - )?; + let input = if matches!(input, PipelineData::ByteStream(..)) { + input + } else { + input.map( + move |mut value| { + let result = PluginCustomValue::serialize_custom_values_in(&mut value) + // Make sure to mark them with the source so they pass correctly, too. + .and_then(|_| { + PluginCustomValueWithSource::add_source_in(&mut value, &source) + }); + match result { + Ok(()) => value, + Err(err) => Value::error(err, value.span()), + } + }, + None, + )? + }; // Eval the block with the input let mut stack = Stack::new().capture(); - eval_block::(&self.engine_state, &mut stack, &block, input)?.map( - |mut value| { - // Make sure to deserialize custom values - let result = PluginCustomValueWithSource::remove_source_in(&mut value) - .and_then(|_| PluginCustomValue::deserialize_custom_values_in(&mut value)); - match result { - Ok(()) => value, - Err(err) => Value::error(err, value.span()), - } - }, - None, - ) + let data = eval_block::(&self.engine_state, &mut stack, &block, input)?; + if matches!(data, PipelineData::ByteStream(..)) { + Ok(data) + } else { + data.map( + |mut value| { + // Make sure to deserialize custom values + let result = PluginCustomValueWithSource::remove_source_in(&mut value) + .and_then(|_| PluginCustomValue::deserialize_custom_values_in(&mut value)); + match result { + Ok(()) => value, + Err(err) => Value::error(err, value.span()), + } + }, + None, + ) + } } /// Evaluate some Nushell source code with the plugin commands in scope. @@ -176,7 +187,7 @@ impl PluginTest { /// # fn test(MyPlugin: impl Plugin + Send + 'static) -> Result<(), ShellError> { /// let result = PluginTest::new("my_plugin", MyPlugin.into())? /// .eval("42 | my-command")? - /// .into_value(Span::test_data()); + /// .into_value(Span::test_data())?; /// assert_eq!(Value::test_string("42"), result); /// # Ok(()) /// # } @@ -219,7 +230,7 @@ impl PluginTest { if let Some(expectation) = &example.result { match self.eval(example.example) { Ok(data) => { - let mut value = data.into_value(Span::test_data()); + let mut value = data.into_value(Span::test_data())?; // Set all of the spans in the value to test_data() to avoid unnecessary // differences when printing diff --git a/crates/nu-plugin-test-support/tests/custom_value/mod.rs b/crates/nu-plugin-test-support/tests/custom_value/mod.rs index aaae5538ff..f703a92e33 100644 --- a/crates/nu-plugin-test-support/tests/custom_value/mod.rs +++ b/crates/nu-plugin-test-support/tests/custom_value/mod.rs @@ -143,7 +143,7 @@ fn test_into_int_from_u32() -> Result<(), ShellError> { "into int from u32", PipelineData::Value(CustomU32(42).into_value(Span::test_data()), None), )? - .into_value(Span::test_data()); + .into_value(Span::test_data())?; assert_eq!(Value::test_int(42), result); Ok(()) } diff --git a/crates/nu-plugin-test-support/tests/hello/mod.rs b/crates/nu-plugin-test-support/tests/hello/mod.rs index 00886f1888..424940f156 100644 --- a/crates/nu-plugin-test-support/tests/hello/mod.rs +++ b/crates/nu-plugin-test-support/tests/hello/mod.rs @@ -80,7 +80,7 @@ fn test_requiring_nu_cmd_lang_commands() -> Result<(), ShellError> { let result = PluginTest::new("hello", HelloPlugin.into())? .eval("do { let greeting = hello; $greeting }")? - .into_value(Span::test_data()); + .into_value(Span::test_data())?; assert_eq!(Value::test_string("Hello, World!"), result); diff --git a/crates/nu-plugin-test-support/tests/lowercase/mod.rs b/crates/nu-plugin-test-support/tests/lowercase/mod.rs index 33446cea86..0072a08aa2 100644 --- a/crates/nu-plugin-test-support/tests/lowercase/mod.rs +++ b/crates/nu-plugin-test-support/tests/lowercase/mod.rs @@ -73,7 +73,7 @@ fn test_lowercase_using_eval_with() -> Result<(), ShellError> { assert_eq!( Value::test_list(vec![Value::test_string("hello world")]), - result.into_value(Span::test_data()) + result.into_value(Span::test_data())? ); Ok(()) diff --git a/crates/nu-plugin/src/plugin/command.rs b/crates/nu-plugin/src/plugin/command.rs index ad8ecd7d9c..5def950b0b 100644 --- a/crates/nu-plugin/src/plugin/command.rs +++ b/crates/nu-plugin/src/plugin/command.rs @@ -313,7 +313,7 @@ where // Unwrap the PipelineData from input, consuming the potential stream, and pass it to the // simpler signature in Plugin let span = input.span().unwrap_or(call.head); - let input_value = input.into_value(span); + let input_value = input.into_value(span)?; // Wrap the output in PipelineData::Value ::run(self, plugin, engine, call, &input_value) .map(|value| PipelineData::Value(value, None)) diff --git a/crates/nu-plugin/src/plugin/interface/mod.rs b/crates/nu-plugin/src/plugin/interface/mod.rs index 70e143ece5..e3e9679471 100644 --- a/crates/nu-plugin/src/plugin/interface/mod.rs +++ b/crates/nu-plugin/src/plugin/interface/mod.rs @@ -345,7 +345,7 @@ impl InterfaceManager for EngineInterfaceManager { }); Ok(PipelineData::ListStream(stream, meta)) } - PipelineData::Empty | PipelineData::ExternalStream { .. } => Ok(data), + PipelineData::Empty | PipelineData::ByteStream(..) => Ok(data), } } } @@ -850,7 +850,7 @@ impl EngineInterface { let input = input.map_or_else(|| PipelineData::Empty, |v| PipelineData::Value(v, None)); let output = self.eval_closure_with_stream(closure, positional, input, true, false)?; // Unwrap an error value - match output.into_value(closure.span) { + match output.into_value(closure.span)? { Value::Error { error, .. } => Err(*error), value => Ok(value), } @@ -920,7 +920,7 @@ impl Interface for EngineInterface { }); Ok(PipelineData::ListStream(stream, meta)) } - PipelineData::Empty | PipelineData::ExternalStream { .. } => Ok(data), + PipelineData::Empty | PipelineData::ByteStream(..) => Ok(data), } } } diff --git a/crates/nu-plugin/src/plugin/interface/tests.rs b/crates/nu-plugin/src/plugin/interface/tests.rs index 17018cbc00..ed04190712 100644 --- a/crates/nu-plugin/src/plugin/interface/tests.rs +++ b/crates/nu-plugin/src/plugin/interface/tests.rs @@ -4,10 +4,9 @@ use super::{EngineInterfaceManager, ReceivedPluginCall}; use nu_plugin_core::{interface_test_util::TestCase, Interface, InterfaceManager}; use nu_plugin_protocol::{ test_util::{expected_test_custom_value, test_plugin_custom_value, TestCustomValue}, - CallInfo, CustomValueOp, EngineCall, EngineCallId, EngineCallResponse, EvaluatedCall, - ExternalStreamInfo, ListStreamInfo, PipelineDataHeader, PluginCall, PluginCallResponse, - PluginCustomValue, PluginInput, PluginOutput, Protocol, ProtocolInfo, RawStreamInfo, - StreamData, + ByteStreamInfo, CallInfo, CustomValueOp, EngineCall, EngineCallId, EngineCallResponse, + EvaluatedCall, ListStreamInfo, PipelineDataHeader, PluginCall, PluginCallResponse, + PluginCustomValue, PluginInput, PluginOutput, Protocol, ProtocolInfo, StreamData, }; use nu_protocol::{ engine::Closure, Config, CustomValue, IntoInterruptiblePipelineData, LabeledError, @@ -158,16 +157,9 @@ fn manager_consume_all_propagates_message_error_to_readers() -> Result<(), Shell test.add(invalid_input()); let stream = manager.read_pipeline_data( - PipelineDataHeader::ExternalStream(ExternalStreamInfo { + PipelineDataHeader::ByteStream(ByteStreamInfo { + id: 0, span: Span::test_data(), - stdout: Some(RawStreamInfo { - id: 0, - is_binary: false, - known_size: None, - }), - stderr: None, - exit_code: None, - trim_end_newline: false, }), None, )?; @@ -1046,7 +1038,7 @@ fn interface_eval_closure_with_stream() -> Result<(), ShellError> { true, false, )? - .into_value(Span::test_data()); + .into_value(Span::test_data())?; assert_eq!(Value::test_int(2), result); diff --git a/crates/nu-plugin/src/plugin/mod.rs b/crates/nu-plugin/src/plugin/mod.rs index 30ed196dc6..85283aadd0 100644 --- a/crates/nu-plugin/src/plugin/mod.rs +++ b/crates/nu-plugin/src/plugin/mod.rs @@ -30,7 +30,7 @@ pub use interface::{EngineInterface, EngineInterfaceManager}; /// This should be larger than the largest commonly sent message to avoid excessive fragmentation. /// -/// The buffers coming from external streams are typically each 8192 bytes, so double that. +/// The buffers coming from byte streams are typically each 8192 bytes, so double that. #[allow(dead_code)] pub(crate) const OUTPUT_BUFFER_SIZE: usize = 16384; diff --git a/crates/nu-protocol/Cargo.toml b/crates/nu-protocol/Cargo.toml index be45738447..ae04c20ddb 100644 --- a/crates/nu-protocol/Cargo.toml +++ b/crates/nu-protocol/Cargo.toml @@ -31,6 +31,10 @@ serde = { workspace = true, default-features = false } serde_json = { workspace = true, optional = true } thiserror = "1.0" typetag = "0.2" +os_pipe = { workspace = true, features = ["io_safety"] } + +[target.'cfg(unix)'.dependencies] +nix = { workspace = true, default-features = false, features = ["signal"] } [features] plugin = [ diff --git a/crates/nu-protocol/src/debugger/debugger_trait.rs b/crates/nu-protocol/src/debugger/debugger_trait.rs index 7a842f6c28..69d395fb98 100644 --- a/crates/nu-protocol/src/debugger/debugger_trait.rs +++ b/crates/nu-protocol/src/debugger/debugger_trait.rs @@ -44,7 +44,7 @@ pub trait DebugContext: Clone + Copy + Debug { fn leave_element( engine_state: &EngineState, element: &PipelineElement, - result: &Result<(PipelineData, bool), ShellError>, + result: &Result, ) { } } @@ -77,7 +77,7 @@ impl DebugContext for WithDebug { fn leave_element( engine_state: &EngineState, element: &PipelineElement, - result: &Result<(PipelineData, bool), ShellError>, + result: &Result, ) { if let Ok(mut debugger) = engine_state.debugger.lock() { debugger @@ -128,7 +128,7 @@ pub trait Debugger: Send + Debug { &mut self, engine_state: &EngineState, element: &PipelineElement, - result: &Result<(PipelineData, bool), ShellError>, + result: &Result, ) { } diff --git a/crates/nu-protocol/src/debugger/profiler.rs b/crates/nu-protocol/src/debugger/profiler.rs index 9d5bece0ab..53b9d0555a 100644 --- a/crates/nu-protocol/src/debugger/profiler.rs +++ b/crates/nu-protocol/src/debugger/profiler.rs @@ -158,7 +158,7 @@ impl Debugger for Profiler { &mut self, _engine_state: &EngineState, element: &PipelineElement, - result: &Result<(PipelineData, bool), ShellError>, + result: &Result, ) { if self.depth > self.max_depth { return; @@ -167,12 +167,10 @@ impl Debugger for Profiler { let element_span = element.expr.span; let out_opt = self.collect_values.then(|| match result { - Ok((pipeline_data, _not_sure_what_this_is)) => match pipeline_data { + Ok(pipeline_data) => match pipeline_data { PipelineData::Value(val, ..) => val.clone(), PipelineData::ListStream(..) => Value::string("list stream", element_span), - PipelineData::ExternalStream { .. } => { - Value::string("external stream", element_span) - } + PipelineData::ByteStream(..) => Value::string("byte stream", element_span), _ => Value::nothing(element_span), }, Err(e) => Value::error(e.clone(), element_span), diff --git a/crates/nu-protocol/src/errors/shell_error.rs b/crates/nu-protocol/src/errors/shell_error.rs index e1d7ade338..525f32e925 100644 --- a/crates/nu-protocol/src/errors/shell_error.rs +++ b/crates/nu-protocol/src/errors/shell_error.rs @@ -1,5 +1,6 @@ use miette::Diagnostic; use serde::{Deserialize, Serialize}; +use std::io; use thiserror::Error; use crate::{ @@ -1374,42 +1375,79 @@ impl ShellError { } } -impl From for ShellError { - fn from(input: std::io::Error) -> ShellError { - ShellError::IOError { - msg: format!("{input:?}"), +impl From for ShellError { + fn from(error: io::Error) -> ShellError { + if error.kind() == io::ErrorKind::Other { + match error.into_inner() { + Some(err) => match err.downcast() { + Ok(err) => *err, + Err(err) => Self::IOError { + msg: err.to_string(), + }, + }, + None => Self::IOError { + msg: "unknown error".into(), + }, + } + } else { + Self::IOError { + msg: error.to_string(), + } } } } -impl From> for ShellError { - fn from(error: Spanned) -> Self { - ShellError::IOErrorSpanned { - msg: error.item.to_string(), - span: error.span, +impl From> for ShellError { + fn from(error: Spanned) -> Self { + let Spanned { item: error, span } = error; + if error.kind() == io::ErrorKind::Other { + match error.into_inner() { + Some(err) => match err.downcast() { + Ok(err) => *err, + Err(err) => Self::IOErrorSpanned { + msg: err.to_string(), + span, + }, + }, + None => Self::IOErrorSpanned { + msg: "unknown error".into(), + span, + }, + } + } else { + Self::IOErrorSpanned { + msg: error.to_string(), + span, + } } } } -impl std::convert::From> for ShellError { - fn from(input: Box) -> ShellError { +impl From for io::Error { + fn from(error: ShellError) -> Self { + io::Error::new(io::ErrorKind::Other, error) + } +} + +impl From> for ShellError { + fn from(error: Box) -> ShellError { ShellError::IOError { - msg: input.to_string(), + msg: error.to_string(), } } } impl From> for ShellError { - fn from(input: Box) -> ShellError { + fn from(error: Box) -> ShellError { ShellError::IOError { - msg: format!("{input:?}"), + msg: format!("{error:?}"), } } } impl From for ShellError { - fn from(value: super::LabeledError) -> Self { - ShellError::LabeledError(Box::new(value)) + fn from(error: super::LabeledError) -> Self { + ShellError::LabeledError(Box::new(error)) } } diff --git a/crates/nu-protocol/src/eval_const.rs b/crates/nu-protocol/src/eval_const.rs index 140a8303d9..4cc7e25324 100644 --- a/crates/nu-protocol/src/eval_const.rs +++ b/crates/nu-protocol/src/eval_const.rs @@ -317,7 +317,7 @@ impl Eval for EvalConst { ) -> Result { // TODO: Allow debugging const eval // TODO: eval.rs uses call.head for the span rather than expr.span - Ok(eval_const_call(working_set, call, PipelineData::empty())?.into_value(span)) + eval_const_call(working_set, call, PipelineData::empty())?.into_value(span) } fn eval_external_call( @@ -339,10 +339,7 @@ impl Eval for EvalConst { ) -> Result { // TODO: Allow debugging const eval let block = working_set.get_block(block_id); - Ok( - eval_const_subexpression(working_set, block, PipelineData::empty(), span)? - .into_value(span), - ) + eval_const_subexpression(working_set, block, PipelineData::empty(), span)?.into_value(span) } fn regex_match( diff --git a/crates/nu-protocol/src/lib.rs b/crates/nu-protocol/src/lib.rs index f5842b5b3a..d09186cf46 100644 --- a/crates/nu-protocol/src/lib.rs +++ b/crates/nu-protocol/src/lib.rs @@ -11,14 +11,14 @@ mod example; mod id; mod lev_distance; mod module; -mod pipeline_data; +mod pipeline; #[cfg(feature = "plugin")] mod plugin; +pub mod process; mod signature; pub mod span; mod syntax_shape; mod ty; -pub mod util; mod value; pub use alias::*; @@ -31,12 +31,11 @@ pub use example::*; pub use id::*; pub use lev_distance::levenshtein_distance; pub use module::*; -pub use pipeline_data::*; +pub use pipeline::*; #[cfg(feature = "plugin")] pub use plugin::*; pub use signature::*; pub use span::*; pub use syntax_shape::*; pub use ty::*; -pub use util::BufferedReader; pub use value::*; diff --git a/crates/nu-protocol/src/pipeline/byte_stream.rs b/crates/nu-protocol/src/pipeline/byte_stream.rs new file mode 100644 index 0000000000..f57aecacba --- /dev/null +++ b/crates/nu-protocol/src/pipeline/byte_stream.rs @@ -0,0 +1,822 @@ +use crate::{ + process::{ChildPipe, ChildProcess, ExitStatus}, + ErrSpan, IntoSpanned, OutDest, PipelineData, ShellError, Span, Value, +}; +#[cfg(unix)] +use std::os::fd::OwnedFd; +#[cfg(windows)] +use std::os::windows::io::OwnedHandle; +use std::{ + fmt::Debug, + fs::File, + io::{self, BufRead, BufReader, Cursor, ErrorKind, Read, Write}, + process::Stdio, + sync::{ + atomic::{AtomicBool, Ordering}, + Arc, + }, + thread, +}; + +/// The source of bytes for a [`ByteStream`]. +/// +/// Currently, there are only three possibilities: +/// 1. `Read` (any `dyn` type that implements [`Read`]) +/// 2. [`File`] +/// 3. [`ChildProcess`] +pub enum ByteStreamSource { + Read(Box), + File(File), + Child(Box), +} + +impl ByteStreamSource { + fn reader(self) -> Option { + match self { + ByteStreamSource::Read(read) => Some(SourceReader::Read(read)), + ByteStreamSource::File(file) => Some(SourceReader::File(file)), + ByteStreamSource::Child(mut child) => child.stdout.take().map(|stdout| match stdout { + ChildPipe::Pipe(pipe) => SourceReader::File(convert_file(pipe)), + ChildPipe::Tee(tee) => SourceReader::Read(tee), + }), + } + } +} + +enum SourceReader { + Read(Box), + File(File), +} + +impl Read for SourceReader { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + match self { + SourceReader::Read(reader) => reader.read(buf), + SourceReader::File(file) => file.read(buf), + } + } +} + +/// A potentially infinite, interruptible stream of bytes. +/// +/// The data of a [`ByteStream`] can be accessed using one of the following methods: +/// - [`reader`](ByteStream::reader): returns a [`Read`]-able type to get the raw bytes in the stream. +/// - [`lines`](ByteStream::lines): splits the bytes on lines and returns an [`Iterator`] +/// where each item is a `Result`. +/// - [`chunks`](ByteStream::chunks): returns an [`Iterator`] of [`Value`]s where each value is either a string or binary. +/// Try not to use this method if possible. Rather, please use [`reader`](ByteStream::reader) +/// (or [`lines`](ByteStream::lines) if it matches the situation). +/// +/// Additionally, there are few methods to collect a [`Bytestream`] into memory: +/// - [`into_bytes`](ByteStream::into_bytes): collects all bytes into a [`Vec`]. +/// - [`into_string`](ByteStream::into_string): collects all bytes into a [`String`], erroring if utf-8 decoding failed. +/// - [`into_value`](ByteStream::into_value): collects all bytes into a string [`Value`]. +/// If utf-8 decoding failed, then a binary [`Value`] is returned instead. +/// +/// There are also a few other methods to consume all the data of a [`Bytestream`]: +/// - [`drain`](ByteStream::drain): consumes all bytes and outputs nothing. +/// - [`write_to`](ByteStream::write_to): writes all bytes to the given [`Write`] destination. +/// - [`print`](ByteStream::print): a convenience wrapper around [`write_to`](ByteStream::write_to). +/// It prints all bytes to stdout or stderr. +/// +/// Internally, [`ByteStream`]s currently come in three flavors according to [`ByteStreamSource`]. +/// See its documentation for more information. +pub struct ByteStream { + stream: ByteStreamSource, + span: Span, + ctrlc: Option>, + known_size: Option, +} + +impl ByteStream { + /// Create a new [`ByteStream`] from a [`ByteStreamSource`]. + pub fn new(stream: ByteStreamSource, span: Span, interrupt: Option>) -> Self { + Self { + stream, + span, + ctrlc: interrupt, + known_size: None, + } + } + + /// Create a new [`ByteStream`] from a [`ByteStreamSource::Read`]. + pub fn read( + reader: impl Read + Send + 'static, + span: Span, + interrupt: Option>, + ) -> Self { + Self::new(ByteStreamSource::Read(Box::new(reader)), span, interrupt) + } + + /// Create a new [`ByteStream`] from a [`ByteStreamSource::File`]. + pub fn file(file: File, span: Span, interrupt: Option>) -> Self { + Self::new(ByteStreamSource::File(file), span, interrupt) + } + + /// Create a new [`ByteStream`] from a [`ByteStreamSource::Child`]. + pub fn child(child: ChildProcess, span: Span) -> Self { + Self::new(ByteStreamSource::Child(Box::new(child)), span, None) + } + + /// Create a new [`ByteStream`] that reads from stdin. + pub fn stdin(span: Span) -> Result { + let stdin = os_pipe::dup_stdin().err_span(span)?; + let source = ByteStreamSource::File(convert_file(stdin)); + Ok(Self::new(source, span, None)) + } + + /// Create a new [`ByteStream`] from an [`Iterator`] of bytes slices. + /// + /// The returned [`ByteStream`] will have a [`ByteStreamSource`] of `Read`. + pub fn from_iter(iter: I, span: Span, interrupt: Option>) -> Self + where + I: IntoIterator, + I::IntoIter: Send + 'static, + I::Item: AsRef<[u8]> + Default + Send + 'static, + { + let iter = iter.into_iter(); + let cursor = Some(Cursor::new(I::Item::default())); + Self::read(ReadIterator { iter, cursor }, span, interrupt) + } + + /// Create a new [`ByteStream`] from an [`Iterator`] of [`Result`] bytes slices. + /// + /// The returned [`ByteStream`] will have a [`ByteStreamSource`] of `Read`. + pub fn from_result_iter(iter: I, span: Span, interrupt: Option>) -> Self + where + I: IntoIterator>, + I::IntoIter: Send + 'static, + T: AsRef<[u8]> + Default + Send + 'static, + { + let iter = iter.into_iter(); + let cursor = Some(Cursor::new(T::default())); + Self::read(ReadResultIterator { iter, cursor }, span, interrupt) + } + + /// Set the known size, in number of bytes, of the [`ByteStream`]. + pub fn with_known_size(mut self, size: Option) -> Self { + self.known_size = size; + self + } + + /// Get a reference to the inner [`ByteStreamSource`] of the [`ByteStream`]. + pub fn source(&self) -> &ByteStreamSource { + &self.stream + } + + /// Get a mutable reference to the inner [`ByteStreamSource`] of the [`ByteStream`]. + pub fn source_mut(&mut self) -> &mut ByteStreamSource { + &mut self.stream + } + + /// Returns the [`Span`] associated with the [`ByteStream`]. + pub fn span(&self) -> Span { + self.span + } + + /// Returns the known size, in number of bytes, of the [`ByteStream`]. + pub fn known_size(&self) -> Option { + self.known_size + } + + /// Convert the [`ByteStream`] into its [`Reader`] which allows one to [`Read`] the raw bytes of the stream. + /// + /// [`Reader`] is buffered and also implements [`BufRead`]. + /// + /// If the source of the [`ByteStream`] is [`ByteStreamSource::Child`] and the child has no stdout, + /// then the stream is considered empty and `None` will be returned. + pub fn reader(self) -> Option { + let reader = self.stream.reader()?; + Some(Reader { + reader: BufReader::new(reader), + span: self.span, + ctrlc: self.ctrlc, + }) + } + + /// Convert the [`ByteStream`] into a [`Lines`] iterator where each element is a `Result`. + /// + /// There is no limit on how large each line will be. Ending new lines (`\n` or `\r\n`) are + /// stripped from each line. If a line fails to be decoded as utf-8, then it will become a [`ShellError`]. + /// + /// If the source of the [`ByteStream`] is [`ByteStreamSource::Child`] and the child has no stdout, + /// then the stream is considered empty and `None` will be returned. + pub fn lines(self) -> Option { + let reader = self.stream.reader()?; + Some(Lines { + reader: BufReader::new(reader), + span: self.span, + ctrlc: self.ctrlc, + }) + } + + /// Convert the [`ByteStream`] into a [`Chunks`] iterator where each element is a `Result`. + /// + /// Each call to [`next`](Iterator::next) reads the currently available data from the byte stream source, + /// up to a maximum size. If the chunk of bytes, or an expected portion of it, succeeds utf-8 decoding, + /// then it is returned as a [`Value::String`]. Otherwise, it is turned into a [`Value::Binary`]. + /// Any and all newlines are kept intact in each chunk. + /// + /// Where possible, prefer [`reader`](ByteStream::reader) or [`lines`](ByteStream::lines) over this method. + /// Those methods are more likely to be used in a semantically correct way + /// (and [`reader`](ByteStream::reader) is more efficient too). + /// + /// If the source of the [`ByteStream`] is [`ByteStreamSource::Child`] and the child has no stdout, + /// then the stream is considered empty and `None` will be returned. + pub fn chunks(self) -> Option { + let reader = self.stream.reader()?; + Some(Chunks { + reader: BufReader::new(reader), + span: self.span, + ctrlc: self.ctrlc, + leftover: Vec::new(), + }) + } + + /// Convert the [`ByteStream`] into its inner [`ByteStreamSource`]. + pub fn into_source(self) -> ByteStreamSource { + self.stream + } + + /// Attempt to convert the [`ByteStream`] into a [`Stdio`]. + /// + /// This will succeed if the [`ByteStreamSource`] of the [`ByteStream`] is either: + /// - [`File`](ByteStreamSource::File) + /// - [`Child`](ByteStreamSource::Child) and the child has a stdout that is `Some(ChildPipe::Pipe(..))`. + /// + /// All other cases return an `Err` with the original [`ByteStream`] in it. + pub fn into_stdio(mut self) -> Result { + match self.stream { + ByteStreamSource::Read(..) => Err(self), + ByteStreamSource::File(file) => Ok(file.into()), + ByteStreamSource::Child(child) => { + if let ChildProcess { + stdout: Some(ChildPipe::Pipe(stdout)), + stderr, + .. + } = *child + { + debug_assert!(stderr.is_none(), "stderr should not exist"); + Ok(stdout.into()) + } else { + self.stream = ByteStreamSource::Child(child); + Err(self) + } + } + } + } + + /// Attempt to convert the [`ByteStream`] into a [`ChildProcess`]. + /// + /// This will only succeed if the [`ByteStreamSource`] of the [`ByteStream`] is [`Child`](ByteStreamSource::Child). + /// All other cases return an `Err` with the original [`ByteStream`] in it. + pub fn into_child(self) -> Result { + if let ByteStreamSource::Child(child) = self.stream { + Ok(*child) + } else { + Err(self) + } + } + + /// Collect all the bytes of the [`ByteStream`] into a [`Vec`]. + /// + /// Any trailing new lines are kept in the returned [`Vec`]. + pub fn into_bytes(self) -> Result, ShellError> { + // todo!() ctrlc + match self.stream { + ByteStreamSource::Read(mut read) => { + let mut buf = Vec::new(); + read.read_to_end(&mut buf).err_span(self.span)?; + Ok(buf) + } + ByteStreamSource::File(mut file) => { + let mut buf = Vec::new(); + file.read_to_end(&mut buf).err_span(self.span)?; + Ok(buf) + } + ByteStreamSource::Child(child) => child.into_bytes(), + } + } + + /// Collect all the bytes of the [`ByteStream`] into a [`String`]. + /// + /// The trailing new line (`\n` or `\r\n`), if any, is removed from the [`String`] prior to being returned. + /// + /// If utf-8 decoding fails, an error is returned. + pub fn into_string(self) -> Result { + let span = self.span; + let bytes = self.into_bytes()?; + let mut string = String::from_utf8(bytes).map_err(|_| ShellError::NonUtf8 { span })?; + trim_end_newline(&mut string); + Ok(string) + } + + /// Collect all the bytes of the [`ByteStream`] into a [`Value`]. + /// + /// If the collected bytes are successfully decoded as utf-8, then a [`Value::String`] is returned. + /// The trailing new line (`\n` or `\r\n`), if any, is removed from the [`String`] prior to being returned. + /// Otherwise, a [`Value::Binary`] is returned with any trailing new lines preserved. + pub fn into_value(self) -> Result { + let span = self.span; + let bytes = self.into_bytes()?; + let value = match String::from_utf8(bytes) { + Ok(mut str) => { + trim_end_newline(&mut str); + Value::string(str, span) + } + Err(err) => Value::binary(err.into_bytes(), span), + }; + Ok(value) + } + + /// Consume and drop all bytes of the [`ByteStream`]. + /// + /// If the source of the [`ByteStream`] is [`ByteStreamSource::Child`], + /// then the [`ExitStatus`] of the [`ChildProcess`] is returned. + pub fn drain(self) -> Result, ShellError> { + match self.stream { + ByteStreamSource::Read(mut read) => { + copy_with_interrupt(&mut read, &mut io::sink(), self.span, self.ctrlc.as_deref())?; + Ok(None) + } + ByteStreamSource::File(_) => Ok(None), + ByteStreamSource::Child(child) => Ok(Some(child.wait()?)), + } + } + + /// Print all bytes of the [`ByteStream`] to stdout or stderr. + /// + /// If the source of the [`ByteStream`] is [`ByteStreamSource::Child`], + /// then the [`ExitStatus`] of the [`ChildProcess`] is returned. + pub fn print(self, to_stderr: bool) -> Result, ShellError> { + if to_stderr { + self.write_to(&mut io::stderr()) + } else { + self.write_to(&mut io::stdout()) + } + } + + /// Write all bytes of the [`ByteStream`] to `dest`. + /// + /// If the source of the [`ByteStream`] is [`ByteStreamSource::Child`], + /// then the [`ExitStatus`] of the [`ChildProcess`] is returned. + pub fn write_to(self, dest: &mut impl Write) -> Result, ShellError> { + let span = self.span; + let ctrlc = self.ctrlc.as_deref(); + match self.stream { + ByteStreamSource::Read(mut read) => { + copy_with_interrupt(&mut read, dest, span, ctrlc)?; + Ok(None) + } + ByteStreamSource::File(mut file) => { + copy_with_interrupt(&mut file, dest, span, ctrlc)?; + Ok(None) + } + ByteStreamSource::Child(mut child) => { + // All `OutDest`s except `OutDest::Capture` will cause `stderr` to be `None`. + // Only `save`, `tee`, and `complete` set the stderr `OutDest` to `OutDest::Capture`, + // and those commands have proper simultaneous handling of stdout and stderr. + debug_assert!(child.stderr.is_none(), "stderr should not exist"); + + if let Some(stdout) = child.stdout.take() { + match stdout { + ChildPipe::Pipe(mut pipe) => { + copy_with_interrupt(&mut pipe, dest, span, ctrlc)?; + } + ChildPipe::Tee(mut tee) => { + copy_with_interrupt(&mut tee, dest, span, ctrlc)?; + } + } + } + Ok(Some(child.wait()?)) + } + } + } + + pub(crate) fn write_to_out_dests( + self, + stdout: &OutDest, + stderr: &OutDest, + ) -> Result, ShellError> { + let span = self.span; + let ctrlc = self.ctrlc.as_deref(); + + match self.stream { + ByteStreamSource::Read(read) => { + write_to_out_dest(read, stdout, true, span, ctrlc)?; + Ok(None) + } + ByteStreamSource::File(mut file) => { + match stdout { + OutDest::Pipe | OutDest::Capture | OutDest::Null => {} + OutDest::Inherit => { + copy_with_interrupt(&mut file, &mut io::stdout(), span, ctrlc)?; + } + OutDest::File(f) => { + copy_with_interrupt(&mut file, &mut f.as_ref(), span, ctrlc)?; + } + } + Ok(None) + } + ByteStreamSource::Child(mut child) => { + match (child.stdout.take(), child.stderr.take()) { + (Some(out), Some(err)) => { + // To avoid deadlocks, we must spawn a separate thread to wait on stderr. + thread::scope(|s| { + let err_thread = thread::Builder::new() + .name("stderr writer".into()) + .spawn_scoped(s, || match err { + ChildPipe::Pipe(pipe) => { + write_to_out_dest(pipe, stderr, false, span, ctrlc) + } + ChildPipe::Tee(tee) => { + write_to_out_dest(tee, stderr, false, span, ctrlc) + } + }) + .err_span(span); + + match out { + ChildPipe::Pipe(pipe) => { + write_to_out_dest(pipe, stdout, true, span, ctrlc) + } + ChildPipe::Tee(tee) => { + write_to_out_dest(tee, stdout, true, span, ctrlc) + } + }?; + + if let Ok(result) = err_thread?.join() { + result?; + } else { + // thread panicked, which should not happen + debug_assert!(false) + } + + Ok::<_, ShellError>(()) + })?; + } + (Some(out), None) => { + // single output stream, we can consume directly + write_to_out_dest(out, stdout, true, span, ctrlc)?; + } + (None, Some(err)) => { + // single output stream, we can consume directly + write_to_out_dest(err, stderr, false, span, ctrlc)?; + } + (None, None) => {} + } + Ok(Some(child.wait()?)) + } + } + } +} + +impl Debug for ByteStream { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("ByteStream").finish() + } +} + +impl From for PipelineData { + fn from(stream: ByteStream) -> Self { + Self::ByteStream(stream, None) + } +} + +struct ReadIterator +where + I: Iterator, + I::Item: AsRef<[u8]>, +{ + iter: I, + cursor: Option>, +} + +impl Read for ReadIterator +where + I: Iterator, + I::Item: AsRef<[u8]>, +{ + fn read(&mut self, buf: &mut [u8]) -> io::Result { + while let Some(cursor) = self.cursor.as_mut() { + let read = cursor.read(buf)?; + if read == 0 { + self.cursor = self.iter.next().map(Cursor::new); + } else { + return Ok(read); + } + } + Ok(0) + } +} + +struct ReadResultIterator +where + I: Iterator>, + T: AsRef<[u8]>, +{ + iter: I, + cursor: Option>, +} + +impl Read for ReadResultIterator +where + I: Iterator>, + T: AsRef<[u8]>, +{ + fn read(&mut self, buf: &mut [u8]) -> io::Result { + while let Some(cursor) = self.cursor.as_mut() { + let read = cursor.read(buf)?; + if read == 0 { + self.cursor = self.iter.next().transpose()?.map(Cursor::new); + } else { + return Ok(read); + } + } + Ok(0) + } +} + +pub struct Reader { + reader: BufReader, + span: Span, + ctrlc: Option>, +} + +impl Reader { + pub fn span(&self) -> Span { + self.span + } +} + +impl Read for Reader { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + if nu_utils::ctrl_c::was_pressed(&self.ctrlc) { + Err(ShellError::InterruptedByUser { + span: Some(self.span), + } + .into()) + } else { + self.reader.read(buf) + } + } +} + +impl BufRead for Reader { + fn fill_buf(&mut self) -> io::Result<&[u8]> { + self.reader.fill_buf() + } + + fn consume(&mut self, amt: usize) { + self.reader.consume(amt) + } +} + +pub struct Lines { + reader: BufReader, + span: Span, + ctrlc: Option>, +} + +impl Lines { + pub fn span(&self) -> Span { + self.span + } +} + +impl Iterator for Lines { + type Item = Result; + + fn next(&mut self) -> Option { + if nu_utils::ctrl_c::was_pressed(&self.ctrlc) { + None + } else { + let mut buf = Vec::new(); + match self.reader.read_until(b'\n', &mut buf) { + Ok(0) => None, + Ok(_) => { + let Ok(mut string) = String::from_utf8(buf) else { + return Some(Err(ShellError::NonUtf8 { span: self.span })); + }; + trim_end_newline(&mut string); + Some(Ok(string)) + } + Err(e) => Some(Err(e.into_spanned(self.span).into())), + } + } + } +} + +pub struct Chunks { + reader: BufReader, + span: Span, + ctrlc: Option>, + leftover: Vec, +} + +impl Chunks { + pub fn span(&self) -> Span { + self.span + } +} + +impl Iterator for Chunks { + type Item = Result; + + fn next(&mut self) -> Option { + if nu_utils::ctrl_c::was_pressed(&self.ctrlc) { + None + } else { + match self.reader.fill_buf() { + Ok(buf) => { + self.leftover.extend_from_slice(buf); + let len = buf.len(); + self.reader.consume(len); + } + Err(err) => return Some(Err(err.into_spanned(self.span).into())), + }; + + if self.leftover.is_empty() { + return None; + } + + match String::from_utf8(std::mem::take(&mut self.leftover)) { + Ok(str) => Some(Ok(Value::string(str, self.span))), + Err(err) => { + if err.utf8_error().error_len().is_some() { + Some(Ok(Value::binary(err.into_bytes(), self.span))) + } else { + let i = err.utf8_error().valid_up_to(); + let mut bytes = err.into_bytes(); + self.leftover = bytes.split_off(i); + let str = String::from_utf8(bytes).expect("valid utf8"); + Some(Ok(Value::string(str, self.span))) + } + } + } + } + } +} + +fn trim_end_newline(string: &mut String) { + if string.ends_with('\n') { + string.pop(); + if string.ends_with('\r') { + string.pop(); + } + } +} + +fn write_to_out_dest( + mut read: impl Read, + stream: &OutDest, + stdout: bool, + span: Span, + ctrlc: Option<&AtomicBool>, +) -> Result<(), ShellError> { + match stream { + OutDest::Pipe | OutDest::Capture => return Ok(()), + OutDest::Null => copy_with_interrupt(&mut read, &mut io::sink(), span, ctrlc), + OutDest::Inherit if stdout => { + copy_with_interrupt(&mut read, &mut io::stdout(), span, ctrlc) + } + OutDest::Inherit => copy_with_interrupt(&mut read, &mut io::stderr(), span, ctrlc), + OutDest::File(file) => copy_with_interrupt(&mut read, &mut file.as_ref(), span, ctrlc), + }?; + Ok(()) +} + +#[cfg(unix)] +pub(crate) fn convert_file>(file: impl Into) -> T { + file.into().into() +} + +#[cfg(windows)] +pub(crate) fn convert_file>(file: impl Into) -> T { + file.into().into() +} + +const DEFAULT_BUF_SIZE: usize = 8192; + +pub fn copy_with_interrupt( + reader: &mut R, + writer: &mut W, + span: Span, + interrupt: Option<&AtomicBool>, +) -> Result +where + R: Read, + W: Write, +{ + if let Some(interrupt) = interrupt { + // #[cfg(any(target_os = "linux", target_os = "android"))] + // { + // return crate::sys::kernel_copy::copy_spec(reader, writer); + // } + match generic_copy(reader, writer, span, interrupt) { + Ok(len) => { + writer.flush().err_span(span)?; + Ok(len) + } + Err(err) => { + let _ = writer.flush(); + Err(err) + } + } + } else { + match io::copy(reader, writer) { + Ok(n) => { + writer.flush().err_span(span)?; + Ok(n) + } + Err(err) => { + let _ = writer.flush(); + Err(err.into_spanned(span).into()) + } + } + } +} + +// Copied from [`std::io::copy`] +fn generic_copy( + reader: &mut R, + writer: &mut W, + span: Span, + interrupt: &AtomicBool, +) -> Result +where + R: Read, + W: Write, +{ + let buf = &mut [0; DEFAULT_BUF_SIZE]; + let mut len = 0; + loop { + if interrupt.load(Ordering::Relaxed) { + return Err(ShellError::InterruptedByUser { span: Some(span) }); + } + let n = match reader.read(buf) { + Ok(0) => break, + Ok(n) => n, + Err(e) if e.kind() == ErrorKind::Interrupted => continue, + Err(e) => return Err(e.into_spanned(span).into()), + }; + len += n; + writer.write_all(&buf[..n]).err_span(span)?; + } + Ok(len as u64) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn test_chunks(data: Vec) -> Chunks + where + T: AsRef<[u8]> + Default + Send + 'static, + { + let reader = ReadIterator { + iter: data.into_iter(), + cursor: Some(Cursor::new(T::default())), + }; + Chunks { + reader: BufReader::new(SourceReader::Read(Box::new(reader))), + span: Span::test_data(), + ctrlc: None, + leftover: Vec::new(), + } + } + + #[test] + fn chunks_read_string() { + let data = vec!["Nushell", "が好きです"]; + let chunks = test_chunks(data.clone()); + let actual = chunks.collect::, _>>().unwrap(); + let expected = data.into_iter().map(Value::test_string).collect::>(); + assert_eq!(expected, actual); + } + + #[test] + fn chunks_read_string_split_utf8() { + let expected = "Nushell最高!"; + let chunks = test_chunks(vec![&b"Nushell\xe6"[..], b"\x9c\x80\xe9", b"\xab\x98!"]); + + let actual = chunks + .into_iter() + .map(|value| value.and_then(Value::into_string)) + .collect::>() + .unwrap(); + + assert_eq!(expected, actual); + } + + #[test] + fn chunks_returns_string_or_binary() { + let chunks = test_chunks(vec![b"Nushell".as_slice(), b"\x9c\x80\xe9abcd", b"efgh"]); + let actual = chunks.collect::, _>>().unwrap(); + let expected = vec![ + Value::test_string("Nushell"), + Value::test_binary(b"\x9c\x80\xe9abcd"), + Value::test_string("efgh"), + ]; + assert_eq!(actual, expected) + } +} diff --git a/crates/nu-protocol/src/pipeline_data/list_stream.rs b/crates/nu-protocol/src/pipeline/list_stream.rs similarity index 100% rename from crates/nu-protocol/src/pipeline_data/list_stream.rs rename to crates/nu-protocol/src/pipeline/list_stream.rs diff --git a/crates/nu-protocol/src/pipeline_data/metadata.rs b/crates/nu-protocol/src/pipeline/metadata.rs similarity index 100% rename from crates/nu-protocol/src/pipeline_data/metadata.rs rename to crates/nu-protocol/src/pipeline/metadata.rs diff --git a/crates/nu-protocol/src/pipeline/mod.rs b/crates/nu-protocol/src/pipeline/mod.rs new file mode 100644 index 0000000000..a018a084ed --- /dev/null +++ b/crates/nu-protocol/src/pipeline/mod.rs @@ -0,0 +1,11 @@ +pub mod byte_stream; +pub mod list_stream; +mod metadata; +mod out_dest; +mod pipeline_data; + +pub use byte_stream::*; +pub use list_stream::*; +pub use metadata::*; +pub use out_dest::*; +pub use pipeline_data::*; diff --git a/crates/nu-protocol/src/pipeline_data/out_dest.rs b/crates/nu-protocol/src/pipeline/out_dest.rs similarity index 81% rename from crates/nu-protocol/src/pipeline_data/out_dest.rs rename to crates/nu-protocol/src/pipeline/out_dest.rs index 976123e883..69955e6b0b 100644 --- a/crates/nu-protocol/src/pipeline_data/out_dest.rs +++ b/crates/nu-protocol/src/pipeline/out_dest.rs @@ -5,17 +5,17 @@ use std::{fs::File, io, process::Stdio, sync::Arc}; pub enum OutDest { /// Redirect the stdout and/or stderr of one command as the input for the next command in the pipeline. /// - /// The output pipe will be available as the `stdout` of `PipelineData::ExternalStream`. + /// The output pipe will be available as the `stdout` of [`ChildProcess`](crate::process::ChildProcess). /// /// If stdout and stderr are both set to `Pipe`, - /// then they will combined into the `stdout` of `PipelineData::ExternalStream`. + /// then they will combined into the `stdout` of [`ChildProcess`](crate::process::ChildProcess). Pipe, /// Capture output to later be collected into a [`Value`](crate::Value), `Vec`, or used in some other way. /// - /// The output stream(s) will be available in the `stdout` or `stderr` of `PipelineData::ExternalStream`. + /// The output stream(s) will be available in the `stdout` or `stderr` of [`ChildProcess`](crate::process::ChildProcess). /// /// This is similar to `Pipe` but will never combine stdout and stderr - /// or place an external command's stderr into `stdout` of `PipelineData::ExternalStream`. + /// or place an external command's stderr into `stdout` of [`ChildProcess`](crate::process::ChildProcess). Capture, /// Ignore output. /// diff --git a/crates/nu-protocol/src/pipeline/pipeline_data.rs b/crates/nu-protocol/src/pipeline/pipeline_data.rs new file mode 100644 index 0000000000..d7e58e63a3 --- /dev/null +++ b/crates/nu-protocol/src/pipeline/pipeline_data.rs @@ -0,0 +1,725 @@ +use crate::{ + ast::{Call, PathMember}, + engine::{EngineState, Stack}, + process::{ChildPipe, ChildProcess, ExitStatus}, + ByteStream, Config, ErrSpan, ListStream, OutDest, PipelineMetadata, Range, ShellError, Span, + Value, +}; +use nu_utils::{stderr_write_all_and_flush, stdout_write_all_and_flush}; +use std::{ + io::{Cursor, Read, Write}, + sync::{atomic::AtomicBool, Arc}, +}; + +const LINE_ENDING_PATTERN: &[char] = &['\r', '\n']; + +/// The foundational abstraction for input and output to commands +/// +/// This represents either a single Value or a stream of values coming into the command or leaving a command. +/// +/// A note on implementation: +/// +/// We've tried a few variations of this structure. Listing these below so we have a record. +/// +/// * We tried always assuming a stream in Nushell. This was a great 80% solution, but it had some rough edges. +/// Namely, how do you know the difference between a single string and a list of one string. How do you know +/// when to flatten the data given to you from a data source into the stream or to keep it as an unflattened +/// list? +/// +/// * We tried putting the stream into Value. This had some interesting properties as now commands "just worked +/// on values", but lead to a few unfortunate issues. +/// +/// The first is that you can't easily clone Values in a way that felt largely immutable. For example, if +/// you cloned a Value which contained a stream, and in one variable drained some part of it, then the second +/// variable would see different values based on what you did to the first. +/// +/// To make this kind of mutation thread-safe, we would have had to produce a lock for the stream, which in +/// practice would have meant always locking the stream before reading from it. But more fundamentally, it +/// felt wrong in practice that observation of a value at runtime could affect other values which happen to +/// alias the same stream. By separating these, we don't have this effect. Instead, variables could get +/// concrete list values rather than streams, and be able to view them without non-local effects. +/// +/// * A balance of the two approaches is what we've landed on: Values are thread-safe to pass, and we can stream +/// them into any sources. Streams are still available to model the infinite streams approach of original +/// Nushell. +#[derive(Debug)] +pub enum PipelineData { + Empty, + Value(Value, Option), + ListStream(ListStream, Option), + ByteStream(ByteStream, Option), +} + +impl PipelineData { + pub fn empty() -> PipelineData { + PipelineData::Empty + } + + /// create a `PipelineData::ByteStream` with proper exit_code + /// + /// It's useful to break running without raising error at user level. + pub fn new_external_stream_with_only_exit_code(exit_code: i32) -> PipelineData { + let span = Span::unknown(); + let mut child = ChildProcess::from_raw(None, None, None, span); + child.set_exit_code(exit_code); + PipelineData::ByteStream(ByteStream::child(child, span), None) + } + + pub fn metadata(&self) -> Option { + match self { + PipelineData::Empty => None, + PipelineData::Value(_, meta) + | PipelineData::ListStream(_, meta) + | PipelineData::ByteStream(_, meta) => meta.clone(), + } + } + + pub fn set_metadata(mut self, metadata: Option) -> Self { + match &mut self { + PipelineData::Empty => {} + PipelineData::Value(_, meta) + | PipelineData::ListStream(_, meta) + | PipelineData::ByteStream(_, meta) => *meta = metadata, + } + self + } + + pub fn is_nothing(&self) -> bool { + matches!(self, PipelineData::Value(Value::Nothing { .. }, ..)) + || matches!(self, PipelineData::Empty) + } + + /// PipelineData doesn't always have a Span, but we can try! + pub fn span(&self) -> Option { + match self { + PipelineData::Empty => None, + PipelineData::Value(value, ..) => Some(value.span()), + PipelineData::ListStream(stream, ..) => Some(stream.span()), + PipelineData::ByteStream(stream, ..) => Some(stream.span()), + } + } + + pub fn into_value(self, span: Span) -> Result { + match self { + PipelineData::Empty => Ok(Value::nothing(span)), + PipelineData::Value(value, ..) => Ok(value.with_span(span)), + PipelineData::ListStream(stream, ..) => Ok(stream.into_value()), + PipelineData::ByteStream(stream, ..) => stream.into_value(), + } + } + + /// Writes all values or redirects all output to the current [`OutDest`]s in `stack`. + /// + /// For [`OutDest::Pipe`] and [`OutDest::Capture`], this will return the `PipelineData` as is + /// without consuming input and without writing anything. + /// + /// For the other [`OutDest`]s, the given `PipelineData` will be completely consumed + /// and `PipelineData::Empty` will be returned. + pub fn write_to_out_dests( + self, + engine_state: &EngineState, + stack: &mut Stack, + ) -> Result { + match (self, stack.stdout()) { + (PipelineData::ByteStream(stream, ..), stdout) => { + stream.write_to_out_dests(stdout, stack.stderr())?; + } + (data, OutDest::Pipe | OutDest::Capture) => return Ok(data), + (PipelineData::Empty, ..) => {} + (PipelineData::Value(..), OutDest::Null) => {} + (PipelineData::ListStream(stream, ..), OutDest::Null) => { + // we need to drain the stream in case there are external commands in the pipeline + stream.drain()?; + } + (PipelineData::Value(value, ..), OutDest::File(file)) => { + let bytes = value_to_bytes(value)?; + let mut file = file.as_ref(); + file.write_all(&bytes)?; + file.flush()?; + } + (PipelineData::ListStream(stream, ..), OutDest::File(file)) => { + let mut file = file.as_ref(); + // use BufWriter here? + for value in stream { + let bytes = value_to_bytes(value)?; + file.write_all(&bytes)?; + file.write_all(b"\n")?; + } + file.flush()?; + } + (data @ (PipelineData::Value(..) | PipelineData::ListStream(..)), OutDest::Inherit) => { + data.print(engine_state, stack, false, false)?; + } + } + Ok(PipelineData::Empty) + } + + pub fn drain(self) -> Result, ShellError> { + match self { + PipelineData::Empty => Ok(None), + PipelineData::Value(Value::Error { error, .. }, ..) => Err(*error), + PipelineData::Value(..) => Ok(None), + PipelineData::ListStream(stream, ..) => { + stream.drain()?; + Ok(None) + } + PipelineData::ByteStream(stream, ..) => stream.drain(), + } + } + + /// Try convert from self into iterator + /// + /// It returns Err if the `self` cannot be converted to an iterator. + pub fn into_iter_strict(self, span: Span) -> Result { + Ok(PipelineIterator(match self { + PipelineData::Value(value, ..) => { + let val_span = value.span(); + match value { + Value::List { vals, .. } => PipelineIteratorInner::ListStream( + ListStream::new(vals.into_iter(), val_span, None).into_iter(), + ), + Value::Binary { val, .. } => PipelineIteratorInner::ListStream( + ListStream::new( + val.into_iter().map(move |x| Value::int(x as i64, val_span)), + val_span, + None, + ) + .into_iter(), + ), + Value::Range { val, .. } => PipelineIteratorInner::ListStream( + ListStream::new(val.into_range_iter(val_span, None), val_span, None) + .into_iter(), + ), + // Propagate errors by explicitly matching them before the final case. + Value::Error { error, .. } => return Err(*error), + other => { + return Err(ShellError::OnlySupportsThisInputType { + exp_input_type: "list, binary, range, or byte stream".into(), + wrong_type: other.get_type().to_string(), + dst_span: span, + src_span: val_span, + }) + } + } + } + PipelineData::ListStream(stream, ..) => { + PipelineIteratorInner::ListStream(stream.into_iter()) + } + PipelineData::Empty => { + return Err(ShellError::OnlySupportsThisInputType { + exp_input_type: "list, binary, range, or byte stream".into(), + wrong_type: "null".into(), + dst_span: span, + src_span: span, + }) + } + PipelineData::ByteStream(stream, ..) => { + if let Some(chunks) = stream.chunks() { + PipelineIteratorInner::ByteStream(chunks) + } else { + PipelineIteratorInner::Empty + } + } + })) + } + + pub fn collect_string(self, separator: &str, config: &Config) -> Result { + match self { + PipelineData::Empty => Ok(String::new()), + PipelineData::Value(value, ..) => Ok(value.to_expanded_string(separator, config)), + PipelineData::ListStream(stream, ..) => Ok(stream.into_string(separator, config)), + PipelineData::ByteStream(stream, ..) => stream.into_string(), + } + } + + /// Retrieves string from pipeline data. + /// + /// As opposed to `collect_string` this raises error rather than converting non-string values. + /// The `span` will be used if `ListStream` is encountered since it doesn't carry a span. + pub fn collect_string_strict( + self, + span: Span, + ) -> Result<(String, Span, Option), ShellError> { + match self { + PipelineData::Empty => Ok((String::new(), span, None)), + PipelineData::Value(Value::String { val, .. }, metadata) => Ok((val, span, metadata)), + PipelineData::Value(val, ..) => Err(ShellError::TypeMismatch { + err_message: "string".into(), + span: val.span(), + }), + PipelineData::ListStream(..) => Err(ShellError::TypeMismatch { + err_message: "string".into(), + span, + }), + PipelineData::ByteStream(stream, metadata) => { + let span = stream.span(); + Ok((stream.into_string()?, span, metadata)) + } + } + } + + pub fn follow_cell_path( + self, + cell_path: &[PathMember], + head: Span, + insensitive: bool, + ) -> Result { + match self { + // FIXME: there are probably better ways of doing this + PipelineData::ListStream(stream, ..) => Value::list(stream.into_iter().collect(), head) + .follow_cell_path(cell_path, insensitive), + PipelineData::Value(v, ..) => v.follow_cell_path(cell_path, insensitive), + PipelineData::Empty => Err(ShellError::IncompatiblePathAccess { + type_name: "empty pipeline".to_string(), + span: head, + }), + PipelineData::ByteStream(stream, ..) => Err(ShellError::IncompatiblePathAccess { + type_name: "byte stream".to_string(), + span: stream.span(), + }), + } + } + + /// Simplified mapper to help with simple values also. For full iterator support use `.into_iter()` instead + pub fn map( + self, + mut f: F, + ctrlc: Option>, + ) -> Result + where + Self: Sized, + F: FnMut(Value) -> Value + 'static + Send, + { + match self { + PipelineData::Value(value, ..) => { + let span = value.span(); + match value { + Value::List { vals, .. } => { + Ok(vals.into_iter().map(f).into_pipeline_data(span, ctrlc)) + } + Value::Range { val, .. } => Ok(val + .into_range_iter(span, ctrlc.clone()) + .map(f) + .into_pipeline_data(span, ctrlc)), + value => match f(value) { + Value::Error { error, .. } => Err(*error), + v => Ok(v.into_pipeline_data()), + }, + } + } + PipelineData::Empty => Ok(PipelineData::Empty), + PipelineData::ListStream(stream, ..) => { + Ok(PipelineData::ListStream(stream.map(f), None)) + } + PipelineData::ByteStream(stream, ..) => { + // TODO: is this behavior desired / correct ? + let span = stream.span(); + match String::from_utf8(stream.into_bytes()?) { + Ok(mut str) => { + str.truncate(str.trim_end_matches(LINE_ENDING_PATTERN).len()); + Ok(f(Value::string(str, span)).into_pipeline_data()) + } + Err(err) => Ok(f(Value::binary(err.into_bytes(), span)).into_pipeline_data()), + } + } + } + } + + /// Simplified flatmapper. For full iterator support use `.into_iter()` instead + pub fn flat_map( + self, + mut f: F, + ctrlc: Option>, + ) -> Result + where + Self: Sized, + U: IntoIterator + 'static, + ::IntoIter: 'static + Send, + F: FnMut(Value) -> U + 'static + Send, + { + match self { + PipelineData::Empty => Ok(PipelineData::Empty), + PipelineData::Value(value, ..) => { + let span = value.span(); + match value { + Value::List { vals, .. } => { + Ok(vals.into_iter().flat_map(f).into_pipeline_data(span, ctrlc)) + } + Value::Range { val, .. } => Ok(val + .into_range_iter(span, ctrlc.clone()) + .flat_map(f) + .into_pipeline_data(span, ctrlc)), + value => Ok(f(value).into_iter().into_pipeline_data(span, ctrlc)), + } + } + PipelineData::ListStream(stream, ..) => { + Ok(stream.modify(|iter| iter.flat_map(f)).into()) + } + PipelineData::ByteStream(stream, ..) => { + // TODO: is this behavior desired / correct ? + let span = stream.span(); + match String::from_utf8(stream.into_bytes()?) { + Ok(mut str) => { + str.truncate(str.trim_end_matches(LINE_ENDING_PATTERN).len()); + Ok(f(Value::string(str, span)) + .into_iter() + .into_pipeline_data(span, ctrlc)) + } + Err(err) => Ok(f(Value::binary(err.into_bytes(), span)) + .into_iter() + .into_pipeline_data(span, ctrlc)), + } + } + } + } + + pub fn filter( + self, + mut f: F, + ctrlc: Option>, + ) -> Result + where + Self: Sized, + F: FnMut(&Value) -> bool + 'static + Send, + { + match self { + PipelineData::Empty => Ok(PipelineData::Empty), + PipelineData::Value(value, ..) => { + let span = value.span(); + match value { + Value::List { vals, .. } => { + Ok(vals.into_iter().filter(f).into_pipeline_data(span, ctrlc)) + } + Value::Range { val, .. } => Ok(val + .into_range_iter(span, ctrlc.clone()) + .filter(f) + .into_pipeline_data(span, ctrlc)), + value => { + if f(&value) { + Ok(value.into_pipeline_data()) + } else { + Ok(Value::nothing(span).into_pipeline_data()) + } + } + } + } + PipelineData::ListStream(stream, ..) => Ok(stream.modify(|iter| iter.filter(f)).into()), + PipelineData::ByteStream(stream, ..) => { + // TODO: is this behavior desired / correct ? + let span = stream.span(); + let value = match String::from_utf8(stream.into_bytes()?) { + Ok(mut str) => { + str.truncate(str.trim_end_matches(LINE_ENDING_PATTERN).len()); + Value::string(str, span) + } + Err(err) => Value::binary(err.into_bytes(), span), + }; + if f(&value) { + Ok(value.into_pipeline_data()) + } else { + Ok(Value::nothing(span).into_pipeline_data()) + } + } + } + } + + /// Try to catch the external command exit status and detect if it failed. + /// + /// This is useful for external commands with semicolon, we can detect errors early to avoid + /// commands after the semicolon running. + /// + /// Returns `self` and a flag that indicates if the external command run failed. If `self` is + /// not [`PipelineData::ByteStream`], the flag will be `false`. + /// + /// Currently this will consume an external command to completion. + pub fn check_external_failed(self) -> Result<(Self, bool), ShellError> { + if let PipelineData::ByteStream(stream, metadata) = self { + let span = stream.span(); + match stream.into_child() { + Ok(mut child) => { + // Only check children without stdout. This means that nothing + // later in the pipeline can possibly consume output from this external command. + if child.stdout.is_none() { + // Note: + // In run-external's implementation detail, the result sender thread + // send out stderr message first, then stdout message, then exit_code. + // + // In this clause, we already make sure that `stdout` is None + // But not the case of `stderr`, so if `stderr` is not None + // We need to consume stderr message before reading external commands' exit code. + // + // Or we'll never have a chance to read exit_code if stderr producer produce too much stderr message. + // So we consume stderr stream and rebuild it. + let stderr = child + .stderr + .take() + .map(|mut stderr| { + let mut buf = Vec::new(); + stderr.read_to_end(&mut buf).err_span(span)?; + Ok::<_, ShellError>(buf) + }) + .transpose()?; + + let code = child.wait()?.code(); + let mut child = ChildProcess::from_raw(None, None, None, span); + if let Some(stderr) = stderr { + child.stderr = Some(ChildPipe::Tee(Box::new(Cursor::new(stderr)))); + } + child.set_exit_code(code); + let stream = ByteStream::child(child, span); + Ok((PipelineData::ByteStream(stream, metadata), code != 0)) + } else { + let stream = ByteStream::child(child, span); + Ok((PipelineData::ByteStream(stream, metadata), false)) + } + } + Err(stream) => Ok((PipelineData::ByteStream(stream, metadata), false)), + } + } else { + Ok((self, false)) + } + } + + /// Try to convert Value from Value::Range to Value::List. + /// This is useful to expand Value::Range into array notation, specifically when + /// converting `to json` or `to nuon`. + /// `1..3 | to XX -> [1,2,3]` + pub fn try_expand_range(self) -> Result { + match self { + PipelineData::Value(v, metadata) => { + let span = v.span(); + match v { + Value::Range { val, .. } => { + match *val { + Range::IntRange(range) => { + if range.is_unbounded() { + return Err(ShellError::GenericError { + error: "Cannot create range".into(), + msg: "Unbounded ranges are not allowed when converting to this format".into(), + span: Some(span), + help: Some("Consider using ranges with valid start and end point.".into()), + inner: vec![], + }); + } + } + Range::FloatRange(range) => { + if range.is_unbounded() { + return Err(ShellError::GenericError { + error: "Cannot create range".into(), + msg: "Unbounded ranges are not allowed when converting to this format".into(), + span: Some(span), + help: Some("Consider using ranges with valid start and end point.".into()), + inner: vec![], + }); + } + } + } + let range_values: Vec = val.into_range_iter(span, None).collect(); + Ok(PipelineData::Value(Value::list(range_values, span), None)) + } + x => Ok(PipelineData::Value(x, metadata)), + } + } + _ => Ok(self), + } + } + + /// Consume and print self data immediately. + /// + /// `no_newline` controls if we need to attach newline character to output. + /// `to_stderr` controls if data is output to stderr, when the value is false, the data is output to stdout. + pub fn print( + self, + engine_state: &EngineState, + stack: &mut Stack, + no_newline: bool, + to_stderr: bool, + ) -> Result, ShellError> { + if let PipelineData::ByteStream(stream, ..) = self { + stream.print(to_stderr) + } else { + // If the table function is in the declarations, then we can use it + // to create the table value that will be printed in the terminal + if let Some(decl_id) = engine_state.table_decl_id { + let command = engine_state.get_decl(decl_id); + if command.get_block_id().is_some() { + self.write_all_and_flush(engine_state, no_newline, to_stderr)?; + } else { + let call = Call::new(Span::new(0, 0)); + let table = command.run(engine_state, stack, &call, self)?; + table.write_all_and_flush(engine_state, no_newline, to_stderr)?; + } + } else { + self.write_all_and_flush(engine_state, no_newline, to_stderr)?; + } + Ok(None) + } + } + + fn write_all_and_flush( + self, + engine_state: &EngineState, + no_newline: bool, + to_stderr: bool, + ) -> Result<(), ShellError> { + let config = engine_state.get_config(); + for item in self { + let mut out = if let Value::Error { error, .. } = item { + return Err(*error); + } else { + item.to_expanded_string("\n", config) + }; + + if !no_newline { + out.push('\n'); + } + + if to_stderr { + stderr_write_all_and_flush(out)? + } else { + stdout_write_all_and_flush(out)? + } + } + + Ok(()) + } +} + +enum PipelineIteratorInner { + Empty, + Value(Value), + ListStream(crate::list_stream::IntoIter), + ByteStream(crate::byte_stream::Chunks), +} + +pub struct PipelineIterator(PipelineIteratorInner); + +impl IntoIterator for PipelineData { + type Item = Value; + + type IntoIter = PipelineIterator; + + fn into_iter(self) -> Self::IntoIter { + PipelineIterator(match self { + PipelineData::Empty => PipelineIteratorInner::Empty, + PipelineData::Value(value, ..) => { + let span = value.span(); + match value { + Value::List { vals, .. } => PipelineIteratorInner::ListStream( + ListStream::new(vals.into_iter(), span, None).into_iter(), + ), + Value::Range { val, .. } => PipelineIteratorInner::ListStream( + ListStream::new(val.into_range_iter(span, None), span, None).into_iter(), + ), + x => PipelineIteratorInner::Value(x), + } + } + PipelineData::ListStream(stream, ..) => { + PipelineIteratorInner::ListStream(stream.into_iter()) + } + PipelineData::ByteStream(stream, ..) => stream.chunks().map_or( + PipelineIteratorInner::Empty, + PipelineIteratorInner::ByteStream, + ), + }) + } +} + +impl Iterator for PipelineIterator { + type Item = Value; + + fn next(&mut self) -> Option { + match &mut self.0 { + PipelineIteratorInner::Empty => None, + PipelineIteratorInner::Value(Value::Nothing { .. }, ..) => None, + PipelineIteratorInner::Value(v, ..) => Some(std::mem::take(v)), + PipelineIteratorInner::ListStream(stream, ..) => stream.next(), + PipelineIteratorInner::ByteStream(stream) => stream.next().map(|x| match x { + Ok(x) => x, + Err(err) => Value::error( + err, + Span::unknown(), //FIXME: unclear where this span should come from + ), + }), + } + } +} + +pub trait IntoPipelineData { + fn into_pipeline_data(self) -> PipelineData; + + fn into_pipeline_data_with_metadata( + self, + metadata: impl Into>, + ) -> PipelineData; +} + +impl IntoPipelineData for V +where + V: Into, +{ + fn into_pipeline_data(self) -> PipelineData { + PipelineData::Value(self.into(), None) + } + + fn into_pipeline_data_with_metadata( + self, + metadata: impl Into>, + ) -> PipelineData { + PipelineData::Value(self.into(), metadata.into()) + } +} + +pub trait IntoInterruptiblePipelineData { + fn into_pipeline_data(self, span: Span, ctrlc: Option>) -> PipelineData; + fn into_pipeline_data_with_metadata( + self, + span: Span, + ctrlc: Option>, + metadata: impl Into>, + ) -> PipelineData; +} + +impl IntoInterruptiblePipelineData for I +where + I: IntoIterator + Send + 'static, + I::IntoIter: Send + 'static, + ::Item: Into, +{ + fn into_pipeline_data(self, span: Span, ctrlc: Option>) -> PipelineData { + ListStream::new(self.into_iter().map(Into::into), span, ctrlc).into() + } + + fn into_pipeline_data_with_metadata( + self, + span: Span, + ctrlc: Option>, + metadata: impl Into>, + ) -> PipelineData { + PipelineData::ListStream( + ListStream::new(self.into_iter().map(Into::into), span, ctrlc), + metadata.into(), + ) + } +} + +fn value_to_bytes(value: Value) -> Result, ShellError> { + let bytes = match value { + Value::String { val, .. } => val.into_bytes(), + Value::Binary { val, .. } => val, + Value::List { vals, .. } => { + let val = vals + .into_iter() + .map(Value::coerce_into_string) + .collect::, ShellError>>()? + .join("\n") + + "\n"; + + val.into_bytes() + } + // Propagate errors by explicitly matching them before the final case. + Value::Error { error, .. } => return Err(*error), + value => value.coerce_into_string()?.into_bytes(), + }; + Ok(bytes) +} diff --git a/crates/nu-protocol/src/pipeline_data/mod.rs b/crates/nu-protocol/src/pipeline_data/mod.rs deleted file mode 100644 index 297eb19c55..0000000000 --- a/crates/nu-protocol/src/pipeline_data/mod.rs +++ /dev/null @@ -1,1185 +0,0 @@ -pub mod list_stream; -mod metadata; -mod out_dest; -mod raw_stream; - -pub use list_stream::{ListStream, ValueIterator}; -pub use metadata::*; -pub use out_dest::*; -pub use raw_stream::*; - -use crate::{ - ast::{Call, PathMember}, - engine::{EngineState, Stack, StateWorkingSet}, - format_error, Config, Range, ShellError, Span, Value, -}; -use nu_utils::{stderr_write_all_and_flush, stdout_write_all_and_flush}; -use std::{ - io::{self, Cursor, Read, Write}, - sync::{atomic::AtomicBool, Arc}, - thread, -}; - -const LINE_ENDING_PATTERN: &[char] = &['\r', '\n']; - -/// The foundational abstraction for input and output to commands -/// -/// This represents either a single Value or a stream of values coming into the command or leaving a command. -/// -/// A note on implementation: -/// -/// We've tried a few variations of this structure. Listing these below so we have a record. -/// -/// * We tried always assuming a stream in Nushell. This was a great 80% solution, but it had some rough edges. -/// Namely, how do you know the difference between a single string and a list of one string. How do you know -/// when to flatten the data given to you from a data source into the stream or to keep it as an unflattened -/// list? -/// -/// * We tried putting the stream into Value. This had some interesting properties as now commands "just worked -/// on values", but lead to a few unfortunate issues. -/// -/// The first is that you can't easily clone Values in a way that felt largely immutable. For example, if -/// you cloned a Value which contained a stream, and in one variable drained some part of it, then the second -/// variable would see different values based on what you did to the first. -/// -/// To make this kind of mutation thread-safe, we would have had to produce a lock for the stream, which in -/// practice would have meant always locking the stream before reading from it. But more fundamentally, it -/// felt wrong in practice that observation of a value at runtime could affect other values which happen to -/// alias the same stream. By separating these, we don't have this effect. Instead, variables could get -/// concrete list values rather than streams, and be able to view them without non-local effects. -/// -/// * A balance of the two approaches is what we've landed on: Values are thread-safe to pass, and we can stream -/// them into any sources. Streams are still available to model the infinite streams approach of original -/// Nushell. -#[derive(Debug)] -pub enum PipelineData { - Value(Value, Option), - ListStream(ListStream, Option), - ExternalStream { - stdout: Option, - stderr: Option, - exit_code: Option, - span: Span, - metadata: Option, - trim_end_newline: bool, - }, - Empty, -} - -impl PipelineData { - pub fn new_with_metadata(metadata: Option, span: Span) -> PipelineData { - PipelineData::Value(Value::nothing(span), metadata) - } - - /// create a `PipelineData::ExternalStream` with proper exit_code - /// - /// It's useful to break running without raising error at user level. - pub fn new_external_stream_with_only_exit_code(exit_code: i64) -> PipelineData { - PipelineData::ExternalStream { - stdout: None, - stderr: None, - exit_code: Some(ListStream::new( - [Value::int(exit_code, Span::unknown())].into_iter(), - Span::unknown(), - None, - )), - span: Span::unknown(), - metadata: None, - trim_end_newline: false, - } - } - - pub fn empty() -> PipelineData { - PipelineData::Empty - } - - pub fn metadata(&self) -> Option { - match self { - PipelineData::ListStream(_, x) => x.clone(), - PipelineData::ExternalStream { metadata: x, .. } => x.clone(), - PipelineData::Value(_, x) => x.clone(), - PipelineData::Empty => None, - } - } - - pub fn set_metadata(mut self, metadata: Option) -> Self { - match &mut self { - PipelineData::ListStream(_, x) => *x = metadata, - PipelineData::ExternalStream { metadata: x, .. } => *x = metadata, - PipelineData::Value(_, x) => *x = metadata, - PipelineData::Empty => {} - } - - self - } - - pub fn is_nothing(&self) -> bool { - matches!(self, PipelineData::Value(Value::Nothing { .. }, ..)) - || matches!(self, PipelineData::Empty) - } - - /// PipelineData doesn't always have a Span, but we can try! - pub fn span(&self) -> Option { - match self { - PipelineData::ListStream(stream, ..) => Some(stream.span()), - PipelineData::ExternalStream { span, .. } => Some(*span), - PipelineData::Value(v, _) => Some(v.span()), - PipelineData::Empty => None, - } - } - - pub fn into_value(self, span: Span) -> Value { - match self { - PipelineData::Empty => Value::nothing(span), - PipelineData::Value(Value::Nothing { .. }, ..) => Value::nothing(span), - PipelineData::Value(v, ..) => v.with_span(span), - PipelineData::ListStream(s, ..) => Value::list( - s.into_iter().collect(), - span, // FIXME? - ), - PipelineData::ExternalStream { - stdout: None, - exit_code, - .. - } => { - // Make sure everything has finished - if let Some(exit_code) = exit_code { - let _: Vec<_> = exit_code.into_iter().collect(); - } - Value::nothing(span) - } - PipelineData::ExternalStream { - stdout: Some(mut s), - exit_code, - trim_end_newline, - .. - } => { - let mut items = vec![]; - - for val in &mut s { - match val { - Ok(val) => { - items.push(val); - } - Err(e) => { - return Value::error(e, span); - } - } - } - - // Make sure everything has finished - if let Some(exit_code) = exit_code { - let _: Vec<_> = exit_code.into_iter().collect(); - } - - // NOTE: currently trim-end-newline only handles for string output. - // For binary, user might need origin data. - if s.is_binary { - let mut output = vec![]; - for item in items { - match item.coerce_into_binary() { - Ok(item) => { - output.extend(item); - } - Err(err) => { - return Value::error(err, span); - } - } - } - - Value::binary( - output, span, // FIXME? - ) - } else { - let mut output = String::new(); - for item in items { - match item.coerce_into_string() { - Ok(s) => output.push_str(&s), - Err(err) => { - return Value::error(err, span); - } - } - } - if trim_end_newline { - output.truncate(output.trim_end_matches(LINE_ENDING_PATTERN).len()) - } - Value::string( - output, span, // FIXME? - ) - } - } - } - } - - /// Writes all values or redirects all output to the current [`OutDest`]s in `stack`. - /// - /// For [`OutDest::Pipe`] and [`OutDest::Capture`], this will return the `PipelineData` as is - /// without consuming input and without writing anything. - /// - /// For the other [`OutDest`]s, the given `PipelineData` will be completely consumed - /// and `PipelineData::Empty` will be returned. - pub fn write_to_out_dests( - self, - engine_state: &EngineState, - stack: &mut Stack, - ) -> Result { - match (self, stack.stdout()) { - ( - PipelineData::ExternalStream { - stdout, - stderr, - exit_code, - span, - metadata, - trim_end_newline, - }, - _, - ) => { - fn needs_redirect( - stream: Option, - out_dest: &OutDest, - ) -> Result> { - match (stream, out_dest) { - (Some(stream), OutDest::Pipe | OutDest::Capture) => Err(Some(stream)), - (Some(stream), _) => Ok(stream), - (None, _) => Err(None), - } - } - - let (stdout, stderr) = match ( - needs_redirect(stdout, stack.stdout()), - needs_redirect(stderr, stack.stderr()), - ) { - (Ok(stdout), Ok(stderr)) => { - // We need to redirect both stdout and stderr - - // To avoid deadlocks, we must spawn a separate thread to wait on stderr. - let err_thread = { - let err = stack.stderr().clone(); - std::thread::Builder::new() - .spawn(move || consume_child_output(stderr, &err)) - }; - - consume_child_output(stdout, stack.stdout())?; - - match err_thread?.join() { - Ok(result) => result?, - Err(err) => { - return Err(ShellError::GenericError { - error: "Error consuming external command stderr".into(), - msg: format! {"{err:?}"}, - span: Some(span), - help: None, - inner: Vec::new(), - }) - } - } - - (None, None) - } - (Ok(stdout), Err(stderr)) => { - // single output stream, we can consume directly - consume_child_output(stdout, stack.stdout())?; - (None, stderr) - } - (Err(stdout), Ok(stderr)) => { - // single output stream, we can consume directly - consume_child_output(stderr, stack.stderr())?; - (stdout, None) - } - (Err(stdout), Err(stderr)) => (stdout, stderr), - }; - - Ok(PipelineData::ExternalStream { - stdout, - stderr, - exit_code, - span, - metadata, - trim_end_newline, - }) - } - (data, OutDest::Pipe | OutDest::Capture) => Ok(data), - (PipelineData::Empty, _) => Ok(PipelineData::Empty), - (PipelineData::Value(_, _), OutDest::Null) => Ok(PipelineData::Empty), - (PipelineData::ListStream(stream, _), OutDest::Null) => { - // we need to drain the stream in case there are external commands in the pipeline - stream.drain()?; - Ok(PipelineData::Empty) - } - (PipelineData::Value(value, _), OutDest::File(file)) => { - let bytes = value_to_bytes(value)?; - let mut file = file.try_clone()?; - file.write_all(&bytes)?; - file.flush()?; - Ok(PipelineData::Empty) - } - (PipelineData::ListStream(stream, _), OutDest::File(file)) => { - let mut file = file.try_clone()?; - // use BufWriter here? - for value in stream { - let bytes = value_to_bytes(value)?; - file.write_all(&bytes)?; - file.write_all(b"\n")?; - } - file.flush()?; - Ok(PipelineData::Empty) - } - ( - data @ (PipelineData::Value(_, _) | PipelineData::ListStream(_, _)), - OutDest::Inherit, - ) => { - let config = engine_state.get_config(); - - if let Some(decl_id) = engine_state.table_decl_id { - let command = engine_state.get_decl(decl_id); - if command.get_block_id().is_some() { - data.write_all_and_flush(engine_state, config, false, false)?; - } else { - let call = Call::new(Span::unknown()); - let stack = &mut stack.start_capture(); - let table = command.run(engine_state, stack, &call, data)?; - table.write_all_and_flush(engine_state, config, false, false)?; - } - } else { - data.write_all_and_flush(engine_state, config, false, false)?; - }; - Ok(PipelineData::Empty) - } - } - } - - pub fn drain(self) -> Result<(), ShellError> { - match self { - PipelineData::Value(Value::Error { error, .. }, _) => Err(*error), - PipelineData::Value(_, _) => Ok(()), - PipelineData::ListStream(stream, _) => stream.drain(), - PipelineData::ExternalStream { stdout, stderr, .. } => { - if let Some(stdout) = stdout { - stdout.drain()?; - } - - if let Some(stderr) = stderr { - stderr.drain()?; - } - - Ok(()) - } - PipelineData::Empty => Ok(()), - } - } - - pub fn drain_with_exit_code(self) -> Result { - match self { - PipelineData::Value(Value::Error { error, .. }, _) => Err(*error), - PipelineData::Value(_, _) => Ok(0), - PipelineData::ListStream(stream, _) => { - stream.drain()?; - Ok(0) - } - PipelineData::ExternalStream { - stdout, - stderr, - exit_code, - .. - } => { - if let Some(stdout) = stdout { - stdout.drain()?; - } - - if let Some(stderr) = stderr { - stderr.drain()?; - } - - if let Some(exit_code) = exit_code { - let result = drain_exit_code(exit_code)?; - Ok(result) - } else { - Ok(0) - } - } - PipelineData::Empty => Ok(0), - } - } - - /// Try convert from self into iterator - /// - /// It returns Err if the `self` cannot be converted to an iterator. - pub fn into_iter_strict(self, span: Span) -> Result { - Ok(PipelineIterator(match self { - PipelineData::Value(value, ..) => { - let val_span = value.span(); - match value { - Value::List { vals, .. } => PipelineIteratorInner::ListStream( - ListStream::new(vals.into_iter(), val_span, None).into_iter(), - ), - Value::Binary { val, .. } => PipelineIteratorInner::ListStream( - ListStream::new( - val.into_iter().map(move |x| Value::int(x as i64, val_span)), - val_span, - None, - ) - .into_iter(), - ), - Value::Range { ref val, .. } => PipelineIteratorInner::ListStream( - ListStream::new(val.into_range_iter(value.span(), None), val_span, None) - .into_iter(), - ), - // Propagate errors by explicitly matching them before the final case. - Value::Error { error, .. } => return Err(*error), - other => { - return Err(ShellError::OnlySupportsThisInputType { - exp_input_type: "list, binary, raw data or range".into(), - wrong_type: other.get_type().to_string(), - dst_span: span, - src_span: val_span, - }) - } - } - } - PipelineData::ListStream(stream, ..) => { - PipelineIteratorInner::ListStream(stream.into_iter()) - } - PipelineData::Empty => { - return Err(ShellError::OnlySupportsThisInputType { - exp_input_type: "list, binary, raw data or range".into(), - wrong_type: "null".into(), - dst_span: span, - src_span: span, - }) - } - PipelineData::ExternalStream { - stdout: Some(stdout), - .. - } => PipelineIteratorInner::ExternalStream(stdout), - PipelineData::ExternalStream { stdout: None, .. } => PipelineIteratorInner::Empty, - })) - } - - pub fn collect_string(self, separator: &str, config: &Config) -> Result { - match self { - PipelineData::Empty => Ok(String::new()), - PipelineData::Value(v, ..) => Ok(v.to_expanded_string(separator, config)), - PipelineData::ListStream(s, ..) => Ok(s.into_string(separator, config)), - PipelineData::ExternalStream { stdout: None, .. } => Ok(String::new()), - PipelineData::ExternalStream { - stdout: Some(s), - trim_end_newline, - .. - } => { - let mut output = String::new(); - - for val in s { - output.push_str(&val?.coerce_into_string()?); - } - if trim_end_newline { - output.truncate(output.trim_end_matches(LINE_ENDING_PATTERN).len()); - } - Ok(output) - } - } - } - - /// Retrieves string from pipeline data. - /// - /// As opposed to `collect_string` this raises error rather than converting non-string values. - /// The `span` will be used if `ListStream` is encountered since it doesn't carry a span. - pub fn collect_string_strict( - self, - span: Span, - ) -> Result<(String, Span, Option), ShellError> { - match self { - PipelineData::Empty => Ok((String::new(), span, None)), - PipelineData::Value(Value::String { val, .. }, metadata) => Ok((val, span, metadata)), - PipelineData::Value(val, _) => Err(ShellError::TypeMismatch { - err_message: "string".into(), - span: val.span(), - }), - PipelineData::ListStream(_, _) => Err(ShellError::TypeMismatch { - err_message: "string".into(), - span, - }), - PipelineData::ExternalStream { - stdout: None, - metadata, - span, - .. - } => Ok((String::new(), span, metadata)), - PipelineData::ExternalStream { - stdout: Some(stdout), - metadata, - span, - .. - } => Ok((stdout.into_string()?.item, span, metadata)), - } - } - - pub fn follow_cell_path( - self, - cell_path: &[PathMember], - head: Span, - insensitive: bool, - ) -> Result { - match self { - // FIXME: there are probably better ways of doing this - PipelineData::ListStream(stream, ..) => Value::list(stream.into_iter().collect(), head) - .follow_cell_path(cell_path, insensitive), - PipelineData::Value(v, ..) => v.follow_cell_path(cell_path, insensitive), - PipelineData::Empty => Err(ShellError::IncompatiblePathAccess { - type_name: "empty pipeline".to_string(), - span: head, - }), - PipelineData::ExternalStream { span, .. } => Err(ShellError::IncompatiblePathAccess { - type_name: "external stream".to_string(), - span, - }), - } - } - - /// Simplified mapper to help with simple values also. For full iterator support use `.into_iter()` instead - pub fn map( - self, - mut f: F, - ctrlc: Option>, - ) -> Result - where - Self: Sized, - F: FnMut(Value) -> Value + 'static + Send, - { - match self { - PipelineData::Value(value, ..) => { - let span = value.span(); - match value { - Value::List { vals, .. } => { - Ok(vals.into_iter().map(f).into_pipeline_data(span, ctrlc)) - } - Value::Range { val, .. } => Ok(val - .into_range_iter(span, ctrlc.clone()) - .map(f) - .into_pipeline_data(span, ctrlc)), - value => match f(value) { - Value::Error { error, .. } => Err(*error), - v => Ok(v.into_pipeline_data()), - }, - } - } - PipelineData::Empty => Ok(PipelineData::Empty), - PipelineData::ListStream(stream, ..) => { - Ok(PipelineData::ListStream(stream.map(f), None)) - } - PipelineData::ExternalStream { stdout: None, .. } => Ok(PipelineData::empty()), - PipelineData::ExternalStream { - stdout: Some(stream), - trim_end_newline, - .. - } => { - let collected = stream.into_bytes()?; - - if let Ok(mut st) = String::from_utf8(collected.clone().item) { - if trim_end_newline { - st.truncate(st.trim_end_matches(LINE_ENDING_PATTERN).len()); - } - Ok(f(Value::string(st, collected.span)).into_pipeline_data()) - } else { - Ok(f(Value::binary(collected.item, collected.span)).into_pipeline_data()) - } - } - } - } - - /// Simplified flatmapper. For full iterator support use `.into_iter()` instead - pub fn flat_map( - self, - mut f: F, - ctrlc: Option>, - ) -> Result - where - Self: Sized, - U: IntoIterator + 'static, - ::IntoIter: 'static + Send, - F: FnMut(Value) -> U + 'static + Send, - { - match self { - PipelineData::Empty => Ok(PipelineData::Empty), - PipelineData::Value(value, ..) => { - let span = value.span(); - match value { - Value::List { vals, .. } => { - Ok(vals.into_iter().flat_map(f).into_pipeline_data(span, ctrlc)) - } - Value::Range { val, .. } => Ok(val - .into_range_iter(span, ctrlc.clone()) - .flat_map(f) - .into_pipeline_data(span, ctrlc)), - value => Ok(f(value).into_iter().into_pipeline_data(span, ctrlc)), - } - } - PipelineData::ListStream(stream, ..) => { - Ok(stream.modify(|iter| iter.flat_map(f)).into()) - } - PipelineData::ExternalStream { stdout: None, .. } => Ok(PipelineData::Empty), - PipelineData::ExternalStream { - stdout: Some(stream), - span, - trim_end_newline, - .. - } => { - let collected = stream.into_bytes()?; - - if let Ok(mut st) = String::from_utf8(collected.clone().item) { - if trim_end_newline { - st.truncate(st.trim_end_matches(LINE_ENDING_PATTERN).len()) - } - Ok(f(Value::string(st, collected.span)) - .into_iter() - .into_pipeline_data(span, ctrlc)) - } else { - Ok(f(Value::binary(collected.item, collected.span)) - .into_iter() - .into_pipeline_data(span, ctrlc)) - } - } - } - } - - pub fn filter( - self, - mut f: F, - ctrlc: Option>, - ) -> Result - where - Self: Sized, - F: FnMut(&Value) -> bool + 'static + Send, - { - match self { - PipelineData::Empty => Ok(PipelineData::Empty), - PipelineData::Value(value, ..) => { - let span = value.span(); - match value { - Value::List { vals, .. } => { - Ok(vals.into_iter().filter(f).into_pipeline_data(span, ctrlc)) - } - Value::Range { val, .. } => Ok(val - .into_range_iter(span, ctrlc.clone()) - .filter(f) - .into_pipeline_data(span, ctrlc)), - value => { - if f(&value) { - Ok(value.into_pipeline_data()) - } else { - Ok(Value::nothing(span).into_pipeline_data()) - } - } - } - } - PipelineData::ListStream(stream, ..) => Ok(stream.modify(|iter| iter.filter(f)).into()), - PipelineData::ExternalStream { stdout: None, .. } => Ok(PipelineData::Empty), - PipelineData::ExternalStream { - stdout: Some(stream), - trim_end_newline, - .. - } => { - let collected = stream.into_bytes()?; - - if let Ok(mut st) = String::from_utf8(collected.clone().item) { - if trim_end_newline { - st.truncate(st.trim_end_matches(LINE_ENDING_PATTERN).len()) - } - let v = Value::string(st, collected.span); - - if f(&v) { - Ok(v.into_pipeline_data()) - } else { - Ok(PipelineData::new_with_metadata(None, collected.span)) - } - } else { - let v = Value::binary(collected.item, collected.span); - - if f(&v) { - Ok(v.into_pipeline_data()) - } else { - Ok(PipelineData::new_with_metadata(None, collected.span)) - } - } - } - } - } - - /// Try to catch the external stream exit status and detect if it failed. - /// - /// This is useful for external commands with semicolon, we can detect errors early to avoid - /// commands after the semicolon running. - /// - /// Returns `self` and a flag that indicates if the external stream run failed. If `self` is - /// not [`PipelineData::ExternalStream`], the flag will be `false`. - /// - /// Currently this will consume an external stream to completion. - pub fn check_external_failed(self) -> (Self, bool) { - let mut failed_to_run = false; - // Only need ExternalStream without redirecting output. - // It indicates we have no more commands to execute currently. - if let PipelineData::ExternalStream { - stdout: None, - stderr, - mut exit_code, - span, - metadata, - trim_end_newline, - } = self - { - let exit_code = exit_code.take(); - - // Note: - // In run-external's implementation detail, the result sender thread - // send out stderr message first, then stdout message, then exit_code. - // - // In this clause, we already make sure that `stdout` is None - // But not the case of `stderr`, so if `stderr` is not None - // We need to consume stderr message before reading external commands' exit code. - // - // Or we'll never have a chance to read exit_code if stderr producer produce too much stderr message. - // So we consume stderr stream and rebuild it. - let stderr = stderr.map(|stderr_stream| { - let stderr_ctrlc = stderr_stream.ctrlc.clone(); - let stderr_span = stderr_stream.span; - let stderr_bytes = stderr_stream - .into_bytes() - .map(|bytes| bytes.item) - .unwrap_or_default(); - RawStream::new( - Box::new(std::iter::once(Ok(stderr_bytes))), - stderr_ctrlc, - stderr_span, - None, - ) - }); - - match exit_code { - Some(exit_code_stream) => { - let exit_code: Vec = exit_code_stream.into_iter().collect(); - if let Some(Value::Int { val: code, .. }) = exit_code.last() { - // if exit_code is not 0, it indicates error occurred, return back Err. - if *code != 0 { - failed_to_run = true; - } - } - ( - PipelineData::ExternalStream { - stdout: None, - stderr, - exit_code: Some(ListStream::new(exit_code.into_iter(), span, None)), - span, - metadata, - trim_end_newline, - }, - failed_to_run, - ) - } - None => ( - PipelineData::ExternalStream { - stdout: None, - stderr, - exit_code: None, - span, - metadata, - trim_end_newline, - }, - failed_to_run, - ), - } - } else { - (self, false) - } - } - /// Try to convert Value from Value::Range to Value::List. - /// This is useful to expand Value::Range into array notation, specifically when - /// converting `to json` or `to nuon`. - /// `1..3 | to XX -> [1,2,3]` - pub fn try_expand_range(self) -> Result { - match self { - PipelineData::Value(v, metadata) => { - let span = v.span(); - match v { - Value::Range { val, .. } => { - match *val { - Range::IntRange(range) => { - if range.is_unbounded() { - return Err(ShellError::GenericError { - error: "Cannot create range".into(), - msg: "Unbounded ranges are not allowed when converting to this format".into(), - span: Some(span), - help: Some("Consider using ranges with valid start and end point.".into()), - inner: vec![], - }); - } - } - Range::FloatRange(range) => { - if range.is_unbounded() { - return Err(ShellError::GenericError { - error: "Cannot create range".into(), - msg: "Unbounded ranges are not allowed when converting to this format".into(), - span: Some(span), - help: Some("Consider using ranges with valid start and end point.".into()), - inner: vec![], - }); - } - } - } - let range_values: Vec = val.into_range_iter(span, None).collect(); - Ok(PipelineData::Value(Value::list(range_values, span), None)) - } - x => Ok(PipelineData::Value(x, metadata)), - } - } - _ => Ok(self), - } - } - - /// Consume and print self data immediately. - /// - /// `no_newline` controls if we need to attach newline character to output. - /// `to_stderr` controls if data is output to stderr, when the value is false, the data is output to stdout. - pub fn print( - self, - engine_state: &EngineState, - stack: &mut Stack, - no_newline: bool, - to_stderr: bool, - ) -> Result { - // If the table function is in the declarations, then we can use it - // to create the table value that will be printed in the terminal - - let config = engine_state.get_config(); - - if let PipelineData::ExternalStream { - stdout: stream, - stderr: stderr_stream, - exit_code, - .. - } = self - { - return print_if_stream(stream, stderr_stream, to_stderr, exit_code); - } - - if let Some(decl_id) = engine_state.table_decl_id { - let command = engine_state.get_decl(decl_id); - if command.get_block_id().is_some() { - return self.write_all_and_flush(engine_state, config, no_newline, to_stderr); - } - - let call = Call::new(Span::new(0, 0)); - let table = command.run(engine_state, stack, &call, self)?; - table.write_all_and_flush(engine_state, config, no_newline, to_stderr)?; - } else { - self.write_all_and_flush(engine_state, config, no_newline, to_stderr)?; - }; - - Ok(0) - } - - fn write_all_and_flush( - self, - engine_state: &EngineState, - config: &Config, - no_newline: bool, - to_stderr: bool, - ) -> Result { - for item in self { - let mut is_err = false; - let mut out = if let Value::Error { error, .. } = item { - let working_set = StateWorkingSet::new(engine_state); - // Value::Errors must always go to stderr, not stdout. - is_err = true; - format_error(&working_set, &*error) - } else if no_newline { - item.to_expanded_string("", config) - } else { - item.to_expanded_string("\n", config) - }; - - if !no_newline { - out.push('\n'); - } - - if !to_stderr && !is_err { - stdout_write_all_and_flush(out)? - } else { - stderr_write_all_and_flush(out)? - } - } - - Ok(0) - } -} - -enum PipelineIteratorInner { - Empty, - Value(Value), - ListStream(list_stream::IntoIter), - ExternalStream(RawStream), -} - -pub struct PipelineIterator(PipelineIteratorInner); - -impl IntoIterator for PipelineData { - type Item = Value; - - type IntoIter = PipelineIterator; - - fn into_iter(self) -> Self::IntoIter { - PipelineIterator(match self { - PipelineData::Value(value, ..) => { - let span = value.span(); - match value { - Value::List { vals, .. } => PipelineIteratorInner::ListStream( - ListStream::new(vals.into_iter(), span, None).into_iter(), - ), - Value::Range { val, .. } => PipelineIteratorInner::ListStream( - ListStream::new(val.into_range_iter(span, None), span, None).into_iter(), - ), - x => PipelineIteratorInner::Value(x), - } - } - PipelineData::ListStream(stream, ..) => { - PipelineIteratorInner::ListStream(stream.into_iter()) - } - PipelineData::ExternalStream { - stdout: Some(stdout), - .. - } => PipelineIteratorInner::ExternalStream(stdout), - PipelineData::ExternalStream { stdout: None, .. } => PipelineIteratorInner::Empty, - PipelineData::Empty => PipelineIteratorInner::Empty, - }) - } -} - -pub fn print_if_stream( - stream: Option, - stderr_stream: Option, - to_stderr: bool, - exit_code: Option, -) -> Result { - if let Some(stderr_stream) = stderr_stream { - thread::Builder::new() - .name("stderr consumer".to_string()) - .spawn(move || { - let RawStream { - stream, - leftover, - ctrlc, - .. - } = stderr_stream; - let mut stderr = std::io::stderr(); - let _ = stderr.write_all(&leftover); - drop(leftover); - for bytes in stream { - if nu_utils::ctrl_c::was_pressed(&ctrlc) { - break; - } - match bytes { - Ok(bytes) => { - let _ = stderr.write_all(&bytes); - } - Err(err) => { - // we don't have access to EngineState, but maybe logging the debug - // impl is better than nothing - eprintln!("Error in stderr stream: {err:?}"); - break; - } - } - } - })?; - } - - if let Some(stream) = stream { - for s in stream { - let s_live = s?; - let bin_output = s_live.coerce_into_binary()?; - - if !to_stderr { - stdout_write_all_and_flush(&bin_output)? - } else { - stderr_write_all_and_flush(&bin_output)? - } - } - } - - // Make sure everything has finished - if let Some(exit_code) = exit_code { - return drain_exit_code(exit_code); - } - - Ok(0) -} - -fn drain_exit_code(exit_code: ListStream) -> Result { - let mut exit_codes: Vec<_> = exit_code.into_iter().collect(); - match exit_codes.pop() { - #[cfg(unix)] - Some(Value::Error { error, .. }) => Err(*error), - Some(Value::Int { val, .. }) => Ok(val), - _ => Ok(0), - } -} - -/// Only call this if `output_stream` is not `OutDest::Pipe` or `OutDest::Capture`. -fn consume_child_output(child_output: RawStream, output_stream: &OutDest) -> io::Result<()> { - let mut output = ReadRawStream::new(child_output); - match output_stream { - OutDest::Pipe | OutDest::Capture => { - // The point of `consume_child_output` is to redirect output *right now*, - // but OutDest::Pipe means to redirect output - // into an OS pipe for *future use* (as input for another command). - // So, this branch makes no sense, and will simply drop `output` instead of draining it. - // This could trigger a `SIGPIPE` for the external command, - // since there will be no reader for its pipe. - debug_assert!(false) - } - OutDest::Null => { - io::copy(&mut output, &mut io::sink())?; - } - OutDest::Inherit => { - io::copy(&mut output, &mut io::stdout())?; - } - OutDest::File(file) => { - io::copy(&mut output, &mut file.try_clone()?)?; - } - } - Ok(()) -} - -impl Iterator for PipelineIterator { - type Item = Value; - - fn next(&mut self) -> Option { - match &mut self.0 { - PipelineIteratorInner::Empty => None, - PipelineIteratorInner::Value(Value::Nothing { .. }, ..) => None, - PipelineIteratorInner::Value(v, ..) => Some(std::mem::take(v)), - PipelineIteratorInner::ListStream(stream, ..) => stream.next(), - PipelineIteratorInner::ExternalStream(stream) => stream.next().map(|x| match x { - Ok(x) => x, - Err(err) => Value::error( - err, - Span::unknown(), //FIXME: unclear where this span should come from - ), - }), - } - } -} - -pub trait IntoPipelineData { - fn into_pipeline_data(self) -> PipelineData; - - fn into_pipeline_data_with_metadata( - self, - metadata: impl Into>, - ) -> PipelineData; -} - -impl IntoPipelineData for V -where - V: Into, -{ - fn into_pipeline_data(self) -> PipelineData { - PipelineData::Value(self.into(), None) - } - - fn into_pipeline_data_with_metadata( - self, - metadata: impl Into>, - ) -> PipelineData { - PipelineData::Value(self.into(), metadata.into()) - } -} - -pub trait IntoInterruptiblePipelineData { - fn into_pipeline_data(self, span: Span, ctrlc: Option>) -> PipelineData; - fn into_pipeline_data_with_metadata( - self, - span: Span, - ctrlc: Option>, - metadata: impl Into>, - ) -> PipelineData; -} - -impl IntoInterruptiblePipelineData for I -where - I: IntoIterator + Send + 'static, - I::IntoIter: Send + 'static, - ::Item: Into, -{ - fn into_pipeline_data(self, span: Span, ctrlc: Option>) -> PipelineData { - ListStream::new(self.into_iter().map(Into::into), span, ctrlc).into() - } - - fn into_pipeline_data_with_metadata( - self, - span: Span, - ctrlc: Option>, - metadata: impl Into>, - ) -> PipelineData { - PipelineData::ListStream( - ListStream::new(self.into_iter().map(Into::into), span, ctrlc), - metadata.into(), - ) - } -} - -fn value_to_bytes(value: Value) -> Result, ShellError> { - let bytes = match value { - Value::String { val, .. } => val.into_bytes(), - Value::Binary { val, .. } => val, - Value::List { vals, .. } => { - let val = vals - .into_iter() - .map(Value::coerce_into_string) - .collect::, ShellError>>()? - .join("\n") - + "\n"; - - val.into_bytes() - } - // Propagate errors by explicitly matching them before the final case. - Value::Error { error, .. } => return Err(*error), - value => value.coerce_into_string()?.into_bytes(), - }; - Ok(bytes) -} - -struct ReadRawStream { - iter: Box, ShellError>>>, - cursor: Option>>, -} - -impl ReadRawStream { - fn new(stream: RawStream) -> Self { - debug_assert!(stream.leftover.is_empty()); - Self { - iter: stream.stream, - cursor: Some(Cursor::new(Vec::new())), - } - } -} - -impl Read for ReadRawStream { - fn read(&mut self, buf: &mut [u8]) -> io::Result { - while let Some(cursor) = self.cursor.as_mut() { - let read = cursor.read(buf)?; - if read > 0 { - return Ok(read); - } else { - match self.iter.next().transpose() { - Ok(next) => { - self.cursor = next.map(Cursor::new); - } - Err(err) => { - // temporary hack - return Err(io::Error::new(io::ErrorKind::Other, err)); - } - } - } - } - Ok(0) - } -} diff --git a/crates/nu-protocol/src/pipeline_data/raw_stream.rs b/crates/nu-protocol/src/pipeline_data/raw_stream.rs deleted file mode 100644 index 846cdd772b..0000000000 --- a/crates/nu-protocol/src/pipeline_data/raw_stream.rs +++ /dev/null @@ -1,176 +0,0 @@ -use crate::*; -use std::{ - fmt::Debug, - sync::{atomic::AtomicBool, Arc}, -}; - -pub struct RawStream { - pub stream: Box, ShellError>> + Send + 'static>, - pub leftover: Vec, - pub ctrlc: Option>, - pub is_binary: bool, - pub span: Span, - pub known_size: Option, // (bytes) -} - -impl RawStream { - pub fn new( - stream: Box, ShellError>> + Send + 'static>, - ctrlc: Option>, - span: Span, - known_size: Option, - ) -> Self { - Self { - stream, - leftover: vec![], - ctrlc, - is_binary: false, - span, - known_size, - } - } - - pub fn into_bytes(self) -> Result>, ShellError> { - let mut output = vec![]; - - for item in self.stream { - if nu_utils::ctrl_c::was_pressed(&self.ctrlc) { - break; - } - output.extend(item?); - } - - Ok(Spanned { - item: output, - span: self.span, - }) - } - - pub fn into_string(self) -> Result, ShellError> { - let mut output = String::new(); - let span = self.span; - let ctrlc = &self.ctrlc.clone(); - - for item in self { - if nu_utils::ctrl_c::was_pressed(ctrlc) { - break; - } - output.push_str(&item?.coerce_into_string()?); - } - - Ok(Spanned { item: output, span }) - } - - pub fn chain(self, stream: RawStream) -> RawStream { - RawStream { - stream: Box::new(self.stream.chain(stream.stream)), - leftover: self.leftover.into_iter().chain(stream.leftover).collect(), - ctrlc: self.ctrlc, - is_binary: self.is_binary, - span: self.span, - known_size: self.known_size, - } - } - - pub fn drain(self) -> Result<(), ShellError> { - for next in self { - match next { - Ok(val) => { - if let Value::Error { error, .. } = val { - return Err(*error); - } - } - Err(err) => return Err(err), - } - } - Ok(()) - } -} -impl Debug for RawStream { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("RawStream").finish() - } -} -impl Iterator for RawStream { - type Item = Result; - - fn next(&mut self) -> Option { - if nu_utils::ctrl_c::was_pressed(&self.ctrlc) { - return None; - } - - // If we know we're already binary, just output that - if self.is_binary { - self.stream.next().map(|buffer| { - buffer.map(|mut v| { - if !self.leftover.is_empty() { - for b in self.leftover.drain(..).rev() { - v.insert(0, b); - } - } - Value::binary(v, self.span) - }) - }) - } else { - // We *may* be text. We're only going to try utf-8. Other decodings - // needs to be taken as binary first, then passed through `decode`. - if let Some(buffer) = self.stream.next() { - match buffer { - Ok(mut v) => { - if !self.leftover.is_empty() { - while let Some(b) = self.leftover.pop() { - v.insert(0, b); - } - } - - match String::from_utf8(v.clone()) { - Ok(s) => { - // Great, we have a complete string, let's output it - Some(Ok(Value::string(s, self.span))) - } - Err(err) => { - // Okay, we *might* have a string but we've also got some errors - if v.is_empty() { - // We can just end here - None - } else if v.len() > 3 - && (v.len() - err.utf8_error().valid_up_to() > 3) - { - // As UTF-8 characters are max 4 bytes, if we have more than that in error we know - // that it's not just a character spanning two frames. - // We now know we are definitely binary, so switch to binary and stay there. - self.is_binary = true; - Some(Ok(Value::binary(v, self.span))) - } else { - // Okay, we have a tiny bit of error at the end of the buffer. This could very well be - // a character that spans two frames. Since this is the case, remove the error from - // the current frame an dput it in the leftover buffer. - self.leftover = v[err.utf8_error().valid_up_to()..].to_vec(); - - let buf = v[0..err.utf8_error().valid_up_to()].to_vec(); - - match String::from_utf8(buf) { - Ok(s) => Some(Ok(Value::string(s, self.span))), - Err(_) => { - // Something is definitely wrong. Switch to binary, and stay there - self.is_binary = true; - Some(Ok(Value::binary(v, self.span))) - } - } - } - } - } - } - Err(e) => Some(Err(e)), - } - } else if !self.leftover.is_empty() { - let output = Ok(Value::binary(self.leftover.clone(), self.span)); - self.leftover.clear(); - - Some(output) - } else { - None - } - } - } -} diff --git a/crates/nu-protocol/src/process/child.rs b/crates/nu-protocol/src/process/child.rs new file mode 100644 index 0000000000..cc74b40fc1 --- /dev/null +++ b/crates/nu-protocol/src/process/child.rs @@ -0,0 +1,294 @@ +use crate::{ + byte_stream::convert_file, process::ExitStatus, ErrSpan, IntoSpanned, ShellError, Span, +}; +use nu_system::ForegroundChild; +use os_pipe::PipeReader; +use std::{ + fmt::Debug, + io::{self, Read}, + sync::mpsc::{self, Receiver, RecvError, TryRecvError}, + thread, +}; + +#[derive(Debug)] +enum ExitStatusFuture { + Finished(Result>), + Running(Receiver>), +} + +impl ExitStatusFuture { + fn wait(&mut self, span: Span) -> Result { + match self { + ExitStatusFuture::Finished(Ok(status)) => Ok(*status), + ExitStatusFuture::Finished(Err(err)) => Err(err.as_ref().clone()), + ExitStatusFuture::Running(receiver) => { + let code = match receiver.recv() { + Ok(Ok(status)) => Ok(status), + Ok(Err(err)) => Err(ShellError::IOErrorSpanned { + msg: format!("failed to get exit code: {err:?}"), + span, + }), + Err(RecvError) => Err(ShellError::IOErrorSpanned { + msg: "failed to get exit code".into(), + span, + }), + }; + + *self = ExitStatusFuture::Finished(code.clone().map_err(Box::new)); + + code + } + } + } + + fn try_wait(&mut self, span: Span) -> Result, ShellError> { + match self { + ExitStatusFuture::Finished(Ok(code)) => Ok(Some(*code)), + ExitStatusFuture::Finished(Err(err)) => Err(err.as_ref().clone()), + ExitStatusFuture::Running(receiver) => { + let code = match receiver.try_recv() { + Ok(Ok(status)) => Ok(Some(status)), + Ok(Err(err)) => Err(ShellError::IOErrorSpanned { + msg: format!("failed to get exit code: {err:?}"), + span, + }), + Err(TryRecvError::Disconnected) => Err(ShellError::IOErrorSpanned { + msg: "failed to get exit code".into(), + span, + }), + Err(TryRecvError::Empty) => Ok(None), + }; + + if let Some(code) = code.clone().transpose() { + *self = ExitStatusFuture::Finished(code.map_err(Box::new)); + } + + code + } + } + } +} + +pub enum ChildPipe { + Pipe(PipeReader), + Tee(Box), +} + +impl Debug for ChildPipe { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("ChildPipe").finish() + } +} + +impl From for ChildPipe { + fn from(pipe: PipeReader) -> Self { + Self::Pipe(pipe) + } +} + +impl Read for ChildPipe { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + match self { + ChildPipe::Pipe(pipe) => pipe.read(buf), + ChildPipe::Tee(tee) => tee.read(buf), + } + } +} + +#[derive(Debug)] +pub struct ChildProcess { + pub stdout: Option, + pub stderr: Option, + exit_status: ExitStatusFuture, + span: Span, +} + +impl ChildProcess { + pub fn new( + mut child: ForegroundChild, + reader: Option, + swap: bool, + span: Span, + ) -> Result { + let (stdout, stderr) = if let Some(combined) = reader { + (Some(combined), None) + } else { + let stdout = child.as_mut().stdout.take().map(convert_file); + let stderr = child.as_mut().stderr.take().map(convert_file); + + if swap { + (stderr, stdout) + } else { + (stdout, stderr) + } + }; + + // Create a thread to wait for the exit status. + let (exit_status_sender, exit_status) = mpsc::channel(); + + thread::Builder::new() + .name("exit status waiter".into()) + .spawn(move || exit_status_sender.send(child.wait().map(Into::into))) + .err_span(span)?; + + Ok(Self::from_raw(stdout, stderr, Some(exit_status), span)) + } + + pub fn from_raw( + stdout: Option, + stderr: Option, + exit_status: Option>>, + span: Span, + ) -> Self { + Self { + stdout: stdout.map(Into::into), + stderr: stderr.map(Into::into), + exit_status: exit_status + .map(ExitStatusFuture::Running) + .unwrap_or(ExitStatusFuture::Finished(Ok(ExitStatus::Exited(0)))), + span, + } + } + + pub fn set_exit_code(&mut self, exit_code: i32) { + self.exit_status = ExitStatusFuture::Finished(Ok(ExitStatus::Exited(exit_code))); + } + + pub fn span(&self) -> Span { + self.span + } + + pub fn into_bytes(mut self) -> Result, ShellError> { + if self.stderr.is_some() { + debug_assert!(false, "stderr should not exist"); + return Err(ShellError::IOErrorSpanned { + msg: "internal error".into(), + span: self.span, + }); + } + + let bytes = if let Some(stdout) = self.stdout { + collect_bytes(stdout).err_span(self.span)? + } else { + Vec::new() + }; + + // TODO: check exit_status + self.exit_status.wait(self.span)?; + + Ok(bytes) + } + + pub fn wait(mut self) -> Result { + if let Some(stdout) = self.stdout.take() { + let stderr = self + .stderr + .take() + .map(|stderr| { + thread::Builder::new() + .name("stderr consumer".into()) + .spawn(move || consume_pipe(stderr)) + }) + .transpose() + .err_span(self.span)?; + + let res = consume_pipe(stdout); + + if let Some(handle) = stderr { + handle + .join() + .map_err(|e| match e.downcast::() { + Ok(io) => ShellError::from((*io).into_spanned(self.span)), + Err(err) => ShellError::GenericError { + error: "Unknown error".into(), + msg: format!("{err:?}"), + span: Some(self.span), + help: None, + inner: Vec::new(), + }, + })? + .err_span(self.span)?; + } + + res.err_span(self.span)?; + } else if let Some(stderr) = self.stderr.take() { + consume_pipe(stderr).err_span(self.span)?; + } + + self.exit_status.wait(self.span) + } + + pub fn try_wait(&mut self) -> Result, ShellError> { + self.exit_status.try_wait(self.span) + } + + pub fn wait_with_output(mut self) -> Result { + let (stdout, stderr) = if let Some(stdout) = self.stdout { + let stderr = self + .stderr + .map(|stderr| thread::Builder::new().spawn(move || collect_bytes(stderr))) + .transpose() + .err_span(self.span)?; + + let stdout = collect_bytes(stdout).err_span(self.span)?; + + let stderr = stderr + .map(|handle| { + handle.join().map_err(|e| match e.downcast::() { + Ok(io) => ShellError::from((*io).into_spanned(self.span)), + Err(err) => ShellError::GenericError { + error: "Unknown error".into(), + msg: format!("{err:?}"), + span: Some(self.span), + help: None, + inner: Vec::new(), + }, + }) + }) + .transpose()? + .transpose() + .err_span(self.span)?; + + (Some(stdout), stderr) + } else { + let stderr = self + .stderr + .map(collect_bytes) + .transpose() + .err_span(self.span)?; + + (None, stderr) + }; + + let exit_status = self.exit_status.wait(self.span)?; + + Ok(ProcessOutput { + stdout, + stderr, + exit_status, + }) + } +} + +fn collect_bytes(pipe: ChildPipe) -> io::Result> { + let mut buf = Vec::new(); + match pipe { + ChildPipe::Pipe(mut pipe) => pipe.read_to_end(&mut buf), + ChildPipe::Tee(mut tee) => tee.read_to_end(&mut buf), + }?; + Ok(buf) +} + +fn consume_pipe(pipe: ChildPipe) -> io::Result<()> { + match pipe { + ChildPipe::Pipe(mut pipe) => io::copy(&mut pipe, &mut io::sink()), + ChildPipe::Tee(mut tee) => io::copy(&mut tee, &mut io::sink()), + }?; + Ok(()) +} + +pub struct ProcessOutput { + pub stdout: Option>, + pub stderr: Option>, + pub exit_status: ExitStatus, +} diff --git a/crates/nu-protocol/src/process/exit_status.rs b/crates/nu-protocol/src/process/exit_status.rs new file mode 100644 index 0000000000..8f3794c44f --- /dev/null +++ b/crates/nu-protocol/src/process/exit_status.rs @@ -0,0 +1,64 @@ +use std::process; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ExitStatus { + Exited(i32), + #[cfg(unix)] + Signaled { + signal: i32, + core_dumped: bool, + }, +} + +impl ExitStatus { + pub fn code(self) -> i32 { + match self { + ExitStatus::Exited(code) => code, + #[cfg(unix)] + ExitStatus::Signaled { signal, .. } => -signal, + } + } +} + +#[cfg(unix)] +impl From for ExitStatus { + fn from(status: process::ExitStatus) -> Self { + use std::os::unix::process::ExitStatusExt; + + match (status.code(), status.signal()) { + (Some(code), None) => Self::Exited(code), + (None, Some(signal)) => Self::Signaled { + signal, + core_dumped: status.core_dumped(), + }, + (None, None) => { + debug_assert!(false, "ExitStatus should have either a code or a signal"); + Self::Exited(-1) + } + (Some(code), Some(signal)) => { + // Should be unreachable, as `code()` will be `None` if `signal()` is `Some` + // according to the docs for `ExitStatus::code`. + debug_assert!( + false, + "ExitStatus cannot have both a code ({code}) and a signal ({signal})" + ); + Self::Signaled { + signal, + core_dumped: status.core_dumped(), + } + } + } + } +} + +#[cfg(not(unix))] +impl From for ExitStatus { + fn from(status: process::ExitStatus) -> Self { + let code = status.code(); + debug_assert!( + code.is_some(), + "`ExitStatus::code` cannot return `None` on windows" + ); + Self::Exited(code.unwrap_or(-1)) + } +} diff --git a/crates/nu-protocol/src/process/mod.rs b/crates/nu-protocol/src/process/mod.rs new file mode 100644 index 0000000000..2fcf65f56e --- /dev/null +++ b/crates/nu-protocol/src/process/mod.rs @@ -0,0 +1,5 @@ +mod child; +mod exit_status; + +pub use child::*; +pub use exit_status::ExitStatus; diff --git a/crates/nu-protocol/src/util.rs b/crates/nu-protocol/src/util.rs deleted file mode 100644 index 1c17c49e4c..0000000000 --- a/crates/nu-protocol/src/util.rs +++ /dev/null @@ -1,52 +0,0 @@ -use crate::ShellError; -use std::io::{BufRead, BufReader, Read}; - -pub struct BufferedReader { - input: BufReader, - error: bool, -} - -impl BufferedReader { - pub fn new(input: BufReader) -> Self { - Self { - input, - error: false, - } - } - - pub fn into_inner(self) -> BufReader { - self.input - } -} - -impl Iterator for BufferedReader { - type Item = Result, ShellError>; - - fn next(&mut self) -> Option { - // Don't try to read more data if an error occurs - if self.error { - return None; - } - - let buffer = self.input.fill_buf(); - match buffer { - Ok(s) => { - let result = s.to_vec(); - - let buffer_len = s.len(); - - if buffer_len == 0 { - None - } else { - self.input.consume(buffer_len); - - Some(Ok(result)) - } - } - Err(e) => { - self.error = true; - Some(Err(ShellError::IOError { msg: e.to_string() })) - } - } - } -} diff --git a/crates/nu-protocol/tests/test_pipeline_data.rs b/crates/nu-protocol/tests/test_pipeline_data.rs index 6675f6a04a..95941285ad 100644 --- a/crates/nu-protocol/tests/test_pipeline_data.rs +++ b/crates/nu-protocol/tests/test_pipeline_data.rs @@ -11,5 +11,5 @@ fn test_convert_pipeline_data_to_value() { let new_span = Span::new(5, 6); let converted_value = pipeline_data.into_value(new_span); - assert_eq!(converted_value, Value::int(value_val, new_span)); + assert_eq!(converted_value, Ok(Value::int(value_val, new_span))); } diff --git a/crates/nu-system/src/foreground.rs b/crates/nu-system/src/foreground.rs index d54cab1f19..2fe3c4fb29 100644 --- a/crates/nu-system/src/foreground.rs +++ b/crates/nu-system/src/foreground.rs @@ -1,6 +1,6 @@ use std::{ io, - process::{Child, Command}, + process::{Child, Command, ExitStatus}, sync::{atomic::AtomicU32, Arc}, }; @@ -72,6 +72,10 @@ impl ForegroundChild { }) } } + + pub fn wait(&mut self) -> io::Result { + self.as_mut().wait() + } } impl AsMut for ForegroundChild { diff --git a/crates/nu_plugin_example/src/commands/collect_external.rs b/crates/nu_plugin_example/src/commands/collect_bytes.rs similarity index 56% rename from crates/nu_plugin_example/src/commands/collect_external.rs rename to crates/nu_plugin_example/src/commands/collect_bytes.rs index e5c8c61f2e..51ca1d4222 100644 --- a/crates/nu_plugin_example/src/commands/collect_external.rs +++ b/crates/nu_plugin_example/src/commands/collect_bytes.rs @@ -1,22 +1,22 @@ use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; use nu_protocol::{ - Category, Example, LabeledError, PipelineData, RawStream, Signature, Type, Value, + ByteStream, Category, Example, LabeledError, PipelineData, Signature, Type, Value, }; use crate::ExamplePlugin; -/// `> | example collect-external` -pub struct CollectExternal; +/// `> | example collect-bytes` +pub struct CollectBytes; -impl PluginCommand for CollectExternal { +impl PluginCommand for CollectBytes { type Plugin = ExamplePlugin; fn name(&self) -> &str { - "example collect-external" + "example collect-bytes" } fn usage(&self) -> &str { - "Example transformer to raw external stream" + "Example transformer to byte stream" } fn search_terms(&self) -> Vec<&str> { @@ -34,7 +34,7 @@ impl PluginCommand for CollectExternal { fn examples(&self) -> Vec { vec![Example { - example: "[a b] | example collect-external", + example: "[a b] | example collect-bytes", description: "collect strings into one stream", result: Some(Value::test_string("ab")), }] @@ -47,26 +47,19 @@ impl PluginCommand for CollectExternal { call: &EvaluatedCall, input: PipelineData, ) -> Result { - let stream = input.into_iter().map(|value| { - value - .as_str() - .map(|str| str.as_bytes()) - .or_else(|_| value.as_binary()) - .map(|bin| bin.to_vec()) - }); - Ok(PipelineData::ExternalStream { - stdout: Some(RawStream::new(Box::new(stream), None, call.head, None)), - stderr: None, - exit_code: None, - span: call.head, - metadata: None, - trim_end_newline: false, - }) + Ok(PipelineData::ByteStream( + ByteStream::from_result_iter( + input.into_iter().map(Value::coerce_into_binary), + call.head, + None, + ), + None, + )) } } #[test] fn test_examples() -> Result<(), nu_protocol::ShellError> { use nu_plugin_test_support::PluginTest; - PluginTest::new("example", ExamplePlugin.into())?.test_command_examples(&CollectExternal) + PluginTest::new("example", ExamplePlugin.into())?.test_command_examples(&CollectBytes) } diff --git a/crates/nu_plugin_example/src/commands/mod.rs b/crates/nu_plugin_example/src/commands/mod.rs index 9425dad4ca..dd808616a9 100644 --- a/crates/nu_plugin_example/src/commands/mod.rs +++ b/crates/nu_plugin_example/src/commands/mod.rs @@ -24,14 +24,14 @@ pub use env::Env; pub use view_span::ViewSpan; // Stream demos -mod collect_external; +mod collect_bytes; mod echo; mod for_each; mod generate; mod seq; mod sum; -pub use collect_external::CollectExternal; +pub use collect_bytes::CollectBytes; pub use echo::Echo; pub use for_each::ForEach; pub use generate::Generate; diff --git a/crates/nu_plugin_example/src/lib.rs b/crates/nu_plugin_example/src/lib.rs index e87c31229d..182bc85121 100644 --- a/crates/nu_plugin_example/src/lib.rs +++ b/crates/nu_plugin_example/src/lib.rs @@ -24,7 +24,7 @@ impl Plugin for ExamplePlugin { Box::new(ViewSpan), Box::new(DisableGc), // Stream demos - Box::new(CollectExternal), + Box::new(CollectBytes), Box::new(Echo), Box::new(ForEach), Box::new(Generate), diff --git a/crates/nu_plugin_polars/src/cache/rm.rs b/crates/nu_plugin_polars/src/cache/rm.rs index b8b814ba60..5918209f32 100644 --- a/crates/nu_plugin_polars/src/cache/rm.rs +++ b/crates/nu_plugin_polars/src/cache/rm.rs @@ -94,7 +94,7 @@ mod test { .add_decl(Box::new(First))? .add_decl(Box::new(Get))? .eval("let df = ([[a b];[1 2] [3 4]] | polars into-df); polars store-ls | get key | first | polars store-rm $in")?; - let value = pipeline_data.into_value(Span::test_data()); + let value = pipeline_data.into_value(Span::test_data())?; let msg = value .as_list()? .first() diff --git a/crates/nu_plugin_polars/src/dataframe/eager/to_arrow.rs b/crates/nu_plugin_polars/src/dataframe/eager/to_arrow.rs index 8dad0d195f..dfb331ac46 100644 --- a/crates/nu_plugin_polars/src/dataframe/eager/to_arrow.rs +++ b/crates/nu_plugin_polars/src/dataframe/eager/to_arrow.rs @@ -124,7 +124,7 @@ pub mod test { assert!(tmp_file.exists()); - let value = pipeline_data.into_value(Span::test_data()); + let value = pipeline_data.into_value(Span::test_data())?; let list = value.as_list()?; assert_eq!(list.len(), 1); let msg = list.first().expect("should have a value").as_str()?; diff --git a/crates/nu_plugin_polars/src/dataframe/eager/to_avro.rs b/crates/nu_plugin_polars/src/dataframe/eager/to_avro.rs index 7a7197e47a..3a5dc317e7 100644 --- a/crates/nu_plugin_polars/src/dataframe/eager/to_avro.rs +++ b/crates/nu_plugin_polars/src/dataframe/eager/to_avro.rs @@ -153,7 +153,7 @@ pub mod test { assert!(tmp_file.exists()); - let value = pipeline_data.into_value(Span::test_data()); + let value = pipeline_data.into_value(Span::test_data())?; let list = value.as_list()?; assert_eq!(list.len(), 1); let msg = list.first().expect("should have a value").as_str()?; diff --git a/crates/nu_plugin_polars/src/dataframe/eager/to_csv.rs b/crates/nu_plugin_polars/src/dataframe/eager/to_csv.rs index ace95d08bb..d55a53f1fc 100644 --- a/crates/nu_plugin_polars/src/dataframe/eager/to_csv.rs +++ b/crates/nu_plugin_polars/src/dataframe/eager/to_csv.rs @@ -171,7 +171,7 @@ pub mod test { assert!(tmp_file.exists()); - let value = pipeline_data.into_value(Span::test_data()); + let value = pipeline_data.into_value(Span::test_data())?; let list = value.as_list()?; assert_eq!(list.len(), 1); let msg = list.first().expect("should have a value").as_str()?; diff --git a/crates/nu_plugin_polars/src/dataframe/eager/to_json_lines.rs b/crates/nu_plugin_polars/src/dataframe/eager/to_json_lines.rs index 4140ca199b..88b4a61bbf 100644 --- a/crates/nu_plugin_polars/src/dataframe/eager/to_json_lines.rs +++ b/crates/nu_plugin_polars/src/dataframe/eager/to_json_lines.rs @@ -125,7 +125,7 @@ pub mod test { assert!(tmp_file.exists()); - let value = pipeline_data.into_value(Span::test_data()); + let value = pipeline_data.into_value(Span::test_data())?; let list = value.as_list()?; assert_eq!(list.len(), 1); let msg = list.first().expect("should have a value").as_str()?; diff --git a/crates/nu_plugin_polars/src/dataframe/eager/to_nu.rs b/crates/nu_plugin_polars/src/dataframe/eager/to_nu.rs index 9acac7355c..8e3cdffa24 100644 --- a/crates/nu_plugin_polars/src/dataframe/eager/to_nu.rs +++ b/crates/nu_plugin_polars/src/dataframe/eager/to_nu.rs @@ -89,7 +89,7 @@ impl PluginCommand for ToNu { call: &EvaluatedCall, input: PipelineData, ) -> Result { - let value = input.into_value(call.head); + let value = input.into_value(call.head)?; if NuDataFrame::can_downcast(&value) || NuLazyFrame::can_downcast(&value) { dataframe_command(plugin, call, value) } else { diff --git a/crates/nu_plugin_polars/src/dataframe/eager/to_parquet.rs b/crates/nu_plugin_polars/src/dataframe/eager/to_parquet.rs index e53a4ac41d..4a8208ae12 100644 --- a/crates/nu_plugin_polars/src/dataframe/eager/to_parquet.rs +++ b/crates/nu_plugin_polars/src/dataframe/eager/to_parquet.rs @@ -124,7 +124,7 @@ pub mod test { assert!(tmp_file.exists()); - let value = pipeline_data.into_value(Span::test_data()); + let value = pipeline_data.into_value(Span::test_data())?; let list = value.as_list()?; assert_eq!(list.len(), 1); let msg = list.first().expect("should have a value").as_str()?; diff --git a/crates/nu_plugin_polars/src/dataframe/expressions/expressions_macro.rs b/crates/nu_plugin_polars/src/dataframe/expressions/expressions_macro.rs index 577524123c..feb559aba5 100644 --- a/crates/nu_plugin_polars/src/dataframe/expressions/expressions_macro.rs +++ b/crates/nu_plugin_polars/src/dataframe/expressions/expressions_macro.rs @@ -159,7 +159,7 @@ macro_rules! lazy_expr_command { call: &EvaluatedCall, input: PipelineData, ) -> Result { - let value = input.into_value(call.head); + let value = input.into_value(call.head)?; if NuDataFrame::can_downcast(&value) || NuLazyFrame::can_downcast(&value) { let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value) .map_err(LabeledError::from)?; @@ -239,7 +239,7 @@ macro_rules! lazy_expr_command { call: &EvaluatedCall, input: PipelineData, ) -> Result { - let value = input.into_value(call.head); + let value = input.into_value(call.head)?; if NuDataFrame::can_downcast(&value) || NuLazyFrame::can_downcast(&value) { let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value) .map_err(LabeledError::from)?; diff --git a/crates/nu_plugin_polars/src/dataframe/expressions/is_in.rs b/crates/nu_plugin_polars/src/dataframe/expressions/is_in.rs index ed4b567983..47be15e2f3 100644 --- a/crates/nu_plugin_polars/src/dataframe/expressions/is_in.rs +++ b/crates/nu_plugin_polars/src/dataframe/expressions/is_in.rs @@ -114,8 +114,7 @@ impl PluginCommand for ExprIsIn { call: &EvaluatedCall, input: PipelineData, ) -> Result { - let value = input.into_value(call.head); - + let value = input.into_value(call.head)?; match PolarsPluginObject::try_from_value(plugin, &value)? { PolarsPluginObject::NuDataFrame(df) => command_df(plugin, engine, call, df), PolarsPluginObject::NuLazyFrame(lazy) => { diff --git a/crates/nu_plugin_polars/src/dataframe/expressions/otherwise.rs b/crates/nu_plugin_polars/src/dataframe/expressions/otherwise.rs index 2bdbfefb35..0e84d8fe96 100644 --- a/crates/nu_plugin_polars/src/dataframe/expressions/otherwise.rs +++ b/crates/nu_plugin_polars/src/dataframe/expressions/otherwise.rs @@ -99,7 +99,7 @@ impl PluginCommand for ExprOtherwise { let otherwise_predicate: Value = call.req(0)?; let otherwise_predicate = NuExpression::try_from_value(plugin, &otherwise_predicate)?; - let value = input.into_value(call.head); + let value = input.into_value(call.head)?; let complete: NuExpression = match NuWhen::try_from_value(plugin, &value)?.when_type { NuWhenType::Then(then) => then.otherwise(otherwise_predicate.into_polars()).into(), NuWhenType::ChainedThen(chained_when) => chained_when diff --git a/crates/nu_plugin_polars/src/dataframe/expressions/when.rs b/crates/nu_plugin_polars/src/dataframe/expressions/when.rs index 158b2ac757..3c1b0eb481 100644 --- a/crates/nu_plugin_polars/src/dataframe/expressions/when.rs +++ b/crates/nu_plugin_polars/src/dataframe/expressions/when.rs @@ -111,7 +111,7 @@ impl PluginCommand for ExprWhen { let then_predicate: Value = call.req(1)?; let then_predicate = NuExpression::try_from_value(plugin, &then_predicate)?; - let value = input.into_value(call.head); + let value = input.into_value(call.head)?; let when_then: NuWhen = match value { Value::Nothing { .. } => when(when_predicate.into_polars()) .then(then_predicate.into_polars()) diff --git a/crates/nu_plugin_polars/src/dataframe/lazy/cast.rs b/crates/nu_plugin_polars/src/dataframe/lazy/cast.rs index 559ca27658..9348a9ec82 100644 --- a/crates/nu_plugin_polars/src/dataframe/lazy/cast.rs +++ b/crates/nu_plugin_polars/src/dataframe/lazy/cast.rs @@ -90,7 +90,7 @@ impl PluginCommand for CastDF { call: &EvaluatedCall, input: PipelineData, ) -> Result { - let value = input.into_value(call.head); + let value = input.into_value(call.head)?; match PolarsPluginObject::try_from_value(plugin, &value)? { PolarsPluginObject::NuLazyFrame(lazy) => { let (dtype, column_nm) = df_args(call)?; diff --git a/crates/nu_plugin_polars/src/dataframe/lazy/collect.rs b/crates/nu_plugin_polars/src/dataframe/lazy/collect.rs index db62426e83..b6d8909e31 100644 --- a/crates/nu_plugin_polars/src/dataframe/lazy/collect.rs +++ b/crates/nu_plugin_polars/src/dataframe/lazy/collect.rs @@ -61,7 +61,7 @@ impl PluginCommand for LazyCollect { call: &EvaluatedCall, input: PipelineData, ) -> Result { - let value = input.into_value(call.head); + let value = input.into_value(call.head)?; match PolarsPluginObject::try_from_value(plugin, &value)? { PolarsPluginObject::NuLazyFrame(lazy) => { let eager = lazy.collect(call.head)?; diff --git a/crates/nu_plugin_polars/src/dataframe/lazy/explode.rs b/crates/nu_plugin_polars/src/dataframe/lazy/explode.rs index b0609d7a3c..787f07fd46 100644 --- a/crates/nu_plugin_polars/src/dataframe/lazy/explode.rs +++ b/crates/nu_plugin_polars/src/dataframe/lazy/explode.rs @@ -50,7 +50,7 @@ impl PluginCommand for LazyExplode { result: Some( NuDataFrame::try_from_columns(vec![ Column::new( - "id".to_string(), + "id".to_string(), vec![ Value::test_int(1), Value::test_int(1), @@ -58,7 +58,7 @@ impl PluginCommand for LazyExplode { Value::test_int(2), ]), Column::new( - "name".to_string(), + "name".to_string(), vec![ Value::test_string("Mercy"), Value::test_string("Mercy"), @@ -66,7 +66,7 @@ impl PluginCommand for LazyExplode { Value::test_string("Bob"), ]), Column::new( - "hobbies".to_string(), + "hobbies".to_string(), vec![ Value::test_string("Cycling"), Value::test_string("Knitting"), @@ -84,7 +84,7 @@ impl PluginCommand for LazyExplode { result: Some( NuDataFrame::try_from_columns(vec![ Column::new( - "hobbies".to_string(), + "hobbies".to_string(), vec![ Value::test_string("Cycling"), Value::test_string("Knitting"), @@ -116,8 +116,7 @@ pub(crate) fn explode( call: &EvaluatedCall, input: PipelineData, ) -> Result { - let value = input.into_value(call.head); - + let value = input.into_value(call.head)?; match PolarsPluginObject::try_from_value(plugin, &value)? { PolarsPluginObject::NuDataFrame(df) => { let lazy = df.lazy(); diff --git a/crates/nu_plugin_polars/src/dataframe/lazy/fetch.rs b/crates/nu_plugin_polars/src/dataframe/lazy/fetch.rs index 49d917a393..8fee4cd159 100644 --- a/crates/nu_plugin_polars/src/dataframe/lazy/fetch.rs +++ b/crates/nu_plugin_polars/src/dataframe/lazy/fetch.rs @@ -67,7 +67,7 @@ impl PluginCommand for LazyFetch { input: PipelineData, ) -> Result { let rows: i64 = call.req(0)?; - let value = input.into_value(call.head); + let value = input.into_value(call.head)?; let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value)?; let eager: NuDataFrame = lazy diff --git a/crates/nu_plugin_polars/src/dataframe/lazy/fill_nan.rs b/crates/nu_plugin_polars/src/dataframe/lazy/fill_nan.rs index 851be588f9..baeb9da01b 100644 --- a/crates/nu_plugin_polars/src/dataframe/lazy/fill_nan.rs +++ b/crates/nu_plugin_polars/src/dataframe/lazy/fill_nan.rs @@ -92,7 +92,7 @@ impl PluginCommand for LazyFillNA { input: PipelineData, ) -> Result { let fill: Value = call.req(0)?; - let value = input.into_value(call.head); + let value = input.into_value(call.head)?; match PolarsPluginObject::try_from_value(plugin, &value)? { PolarsPluginObject::NuDataFrame(df) => { diff --git a/crates/nu_plugin_polars/src/dataframe/lazy/fill_null.rs b/crates/nu_plugin_polars/src/dataframe/lazy/fill_null.rs index 64e6fd0d3f..c5fb67cd8a 100644 --- a/crates/nu_plugin_polars/src/dataframe/lazy/fill_null.rs +++ b/crates/nu_plugin_polars/src/dataframe/lazy/fill_null.rs @@ -69,7 +69,7 @@ impl PluginCommand for LazyFillNull { input: PipelineData, ) -> Result { let fill: Value = call.req(0)?; - let value = input.into_value(call.head); + let value = input.into_value(call.head)?; match PolarsPluginObject::try_from_value(plugin, &value)? { PolarsPluginObject::NuDataFrame(df) => cmd_lazy(plugin, engine, call, df.lazy(), fill), diff --git a/crates/nu_plugin_polars/src/dataframe/lazy/filter.rs b/crates/nu_plugin_polars/src/dataframe/lazy/filter.rs index 6adabb967a..f8d400ddf4 100644 --- a/crates/nu_plugin_polars/src/dataframe/lazy/filter.rs +++ b/crates/nu_plugin_polars/src/dataframe/lazy/filter.rs @@ -72,7 +72,7 @@ impl PluginCommand for LazyFilter { ) -> Result { let expr_value: Value = call.req(0)?; let filter_expr = NuExpression::try_from_value(plugin, &expr_value)?; - let pipeline_value = input.into_value(call.head); + let pipeline_value = input.into_value(call.head)?; let lazy = NuLazyFrame::try_from_value_coerce(plugin, &pipeline_value)?; command(plugin, engine, call, lazy, filter_expr).map_err(LabeledError::from) } diff --git a/crates/nu_plugin_polars/src/dataframe/lazy/filter_with.rs b/crates/nu_plugin_polars/src/dataframe/lazy/filter_with.rs index cd23a3b370..12ccfbc376 100644 --- a/crates/nu_plugin_polars/src/dataframe/lazy/filter_with.rs +++ b/crates/nu_plugin_polars/src/dataframe/lazy/filter_with.rs @@ -67,7 +67,7 @@ impl PluginCommand for FilterWith { call: &EvaluatedCall, input: PipelineData, ) -> Result { - let value = input.into_value(call.head); + let value = input.into_value(call.head)?; let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value)?; command_lazy(plugin, engine, call, lazy).map_err(LabeledError::from) } diff --git a/crates/nu_plugin_polars/src/dataframe/lazy/first.rs b/crates/nu_plugin_polars/src/dataframe/lazy/first.rs index 7f32dbf71d..4692a933b0 100644 --- a/crates/nu_plugin_polars/src/dataframe/lazy/first.rs +++ b/crates/nu_plugin_polars/src/dataframe/lazy/first.rs @@ -97,7 +97,7 @@ impl PluginCommand for FirstDF { call: &EvaluatedCall, input: PipelineData, ) -> Result { - let value = input.into_value(call.head); + let value = input.into_value(call.head)?; if NuLazyFrame::can_downcast(&value) || NuDataFrame::can_downcast(&value) { let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value)?; command(plugin, engine, call, lazy).map_err(LabeledError::from) diff --git a/crates/nu_plugin_polars/src/dataframe/lazy/groupby.rs b/crates/nu_plugin_polars/src/dataframe/lazy/groupby.rs index 2bc7f578c8..7aaccfead9 100644 --- a/crates/nu_plugin_polars/src/dataframe/lazy/groupby.rs +++ b/crates/nu_plugin_polars/src/dataframe/lazy/groupby.rs @@ -138,7 +138,7 @@ impl PluginCommand for ToLazyGroupBy { })?; } - let pipeline_value = input.into_value(call.head); + let pipeline_value = input.into_value(call.head)?; let lazy = NuLazyFrame::try_from_value_coerce(plugin, &pipeline_value)?; command(plugin, engine, call, lazy, expressions).map_err(LabeledError::from) } diff --git a/crates/nu_plugin_polars/src/dataframe/lazy/join.rs b/crates/nu_plugin_polars/src/dataframe/lazy/join.rs index feea8cf308..6db0269403 100644 --- a/crates/nu_plugin_polars/src/dataframe/lazy/join.rs +++ b/crates/nu_plugin_polars/src/dataframe/lazy/join.rs @@ -228,7 +228,7 @@ impl PluginCommand for LazyJoin { let suffix: Option = call.get_flag("suffix")?; let suffix = suffix.unwrap_or_else(|| "_x".into()); - let value = input.into_value(call.head); + let value = input.into_value(call.head)?; let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value)?; let lazy = lazy.to_polars(); diff --git a/crates/nu_plugin_polars/src/dataframe/lazy/last.rs b/crates/nu_plugin_polars/src/dataframe/lazy/last.rs index 44095ac44f..0453c71d1e 100644 --- a/crates/nu_plugin_polars/src/dataframe/lazy/last.rs +++ b/crates/nu_plugin_polars/src/dataframe/lazy/last.rs @@ -72,7 +72,7 @@ impl PluginCommand for LastDF { call: &EvaluatedCall, input: PipelineData, ) -> Result { - let value = input.into_value(call.head); + let value = input.into_value(call.head)?; if NuDataFrame::can_downcast(&value) || NuLazyFrame::can_downcast(&value) { let df = NuLazyFrame::try_from_value_coerce(plugin, &value)?; command(plugin, engine, call, df).map_err(|e| e.into()) diff --git a/crates/nu_plugin_polars/src/dataframe/lazy/median.rs b/crates/nu_plugin_polars/src/dataframe/lazy/median.rs index abd55c77c1..ffd69d14e4 100644 --- a/crates/nu_plugin_polars/src/dataframe/lazy/median.rs +++ b/crates/nu_plugin_polars/src/dataframe/lazy/median.rs @@ -89,7 +89,7 @@ impl PluginCommand for LazyMedian { call: &EvaluatedCall, input: PipelineData, ) -> Result { - let value = input.into_value(call.head); + let value = input.into_value(call.head)?; match PolarsPluginObject::try_from_value(plugin, &value)? { PolarsPluginObject::NuDataFrame(df) => command(plugin, engine, call, df.lazy()), PolarsPluginObject::NuLazyFrame(lazy) => command(plugin, engine, call, lazy), diff --git a/crates/nu_plugin_polars/src/dataframe/lazy/quantile.rs b/crates/nu_plugin_polars/src/dataframe/lazy/quantile.rs index 46339cc9fc..f6217ff89b 100644 --- a/crates/nu_plugin_polars/src/dataframe/lazy/quantile.rs +++ b/crates/nu_plugin_polars/src/dataframe/lazy/quantile.rs @@ -97,7 +97,7 @@ impl PluginCommand for LazyQuantile { call: &EvaluatedCall, input: PipelineData, ) -> Result { - let value = input.into_value(call.head); + let value = input.into_value(call.head)?; let quantile: f64 = call.req(0)?; match PolarsPluginObject::try_from_value(plugin, &value)? { PolarsPluginObject::NuDataFrame(df) => { diff --git a/crates/nu_plugin_polars/src/dataframe/lazy/rename.rs b/crates/nu_plugin_polars/src/dataframe/lazy/rename.rs index b678824584..c32b8d9451 100644 --- a/crates/nu_plugin_polars/src/dataframe/lazy/rename.rs +++ b/crates/nu_plugin_polars/src/dataframe/lazy/rename.rs @@ -120,7 +120,7 @@ impl PluginCommand for RenameDF { call: &EvaluatedCall, input: PipelineData, ) -> Result { - let value = input.into_value(call.head); + let value = input.into_value(call.head)?; let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value)?; command_lazy(plugin, engine, call, lazy).map_err(LabeledError::from) } diff --git a/crates/nu_plugin_polars/src/dataframe/lazy/select.rs b/crates/nu_plugin_polars/src/dataframe/lazy/select.rs index e49aa8e654..75b3f8f804 100644 --- a/crates/nu_plugin_polars/src/dataframe/lazy/select.rs +++ b/crates/nu_plugin_polars/src/dataframe/lazy/select.rs @@ -65,7 +65,7 @@ impl PluginCommand for LazySelect { let expr_value = Value::list(vals, call.head); let expressions = NuExpression::extract_exprs(plugin, expr_value)?; - let pipeline_value = input.into_value(call.head); + let pipeline_value = input.into_value(call.head)?; let lazy = NuLazyFrame::try_from_value_coerce(plugin, &pipeline_value)?; let lazy = NuLazyFrame::new(lazy.to_polars().select(&expressions)); lazy.to_pipeline_data(plugin, engine, call.head) diff --git a/crates/nu_plugin_polars/src/dataframe/lazy/sort_by_expr.rs b/crates/nu_plugin_polars/src/dataframe/lazy/sort_by_expr.rs index 4a975afe97..2beba4424c 100644 --- a/crates/nu_plugin_polars/src/dataframe/lazy/sort_by_expr.rs +++ b/crates/nu_plugin_polars/src/dataframe/lazy/sort_by_expr.rs @@ -145,7 +145,7 @@ impl PluginCommand for LazySortBy { maintain_order, }; - let pipeline_value = input.into_value(call.head); + let pipeline_value = input.into_value(call.head)?; let lazy = NuLazyFrame::try_from_value_coerce(plugin, &pipeline_value)?; let lazy = NuLazyFrame::new(lazy.to_polars().sort_by_exprs(&expressions, sort_options)); lazy.to_pipeline_data(plugin, engine, call.head) diff --git a/crates/nu_plugin_polars/src/dataframe/lazy/with_column.rs b/crates/nu_plugin_polars/src/dataframe/lazy/with_column.rs index e8092231d8..d2f953b068 100644 --- a/crates/nu_plugin_polars/src/dataframe/lazy/with_column.rs +++ b/crates/nu_plugin_polars/src/dataframe/lazy/with_column.rs @@ -83,7 +83,7 @@ impl PluginCommand for WithColumn { call: &EvaluatedCall, input: PipelineData, ) -> Result { - let value = input.into_value(call.head); + let value = input.into_value(call.head)?; let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value)?; command_lazy(plugin, engine, call, lazy).map_err(LabeledError::from) } diff --git a/crates/nu_plugin_polars/src/dataframe/series/masks/is_not_null.rs b/crates/nu_plugin_polars/src/dataframe/series/masks/is_not_null.rs index 218cd116b4..b7e506a67c 100644 --- a/crates/nu_plugin_polars/src/dataframe/series/masks/is_not_null.rs +++ b/crates/nu_plugin_polars/src/dataframe/series/masks/is_not_null.rs @@ -78,8 +78,7 @@ impl PluginCommand for IsNotNull { call: &EvaluatedCall, input: PipelineData, ) -> Result { - let value = input.into_value(call.head); - + let value = input.into_value(call.head)?; match PolarsPluginObject::try_from_value(plugin, &value)? { PolarsPluginObject::NuDataFrame(df) => command(plugin, engine, call, df), PolarsPluginObject::NuLazyFrame(lazy) => { diff --git a/crates/nu_plugin_polars/src/dataframe/series/masks/is_null.rs b/crates/nu_plugin_polars/src/dataframe/series/masks/is_null.rs index beb3793661..bc04e7fb76 100644 --- a/crates/nu_plugin_polars/src/dataframe/series/masks/is_null.rs +++ b/crates/nu_plugin_polars/src/dataframe/series/masks/is_null.rs @@ -80,8 +80,7 @@ impl PluginCommand for IsNull { call: &EvaluatedCall, input: PipelineData, ) -> Result { - let value = input.into_value(call.head); - + let value = input.into_value(call.head)?; match PolarsPluginObject::try_from_value(plugin, &value)? { PolarsPluginObject::NuDataFrame(df) => command(plugin, engine, call, df), PolarsPluginObject::NuLazyFrame(lazy) => { diff --git a/crates/nu_plugin_polars/src/dataframe/series/n_unique.rs b/crates/nu_plugin_polars/src/dataframe/series/n_unique.rs index 5426ef6d1d..51c6e1bdb3 100644 --- a/crates/nu_plugin_polars/src/dataframe/series/n_unique.rs +++ b/crates/nu_plugin_polars/src/dataframe/series/n_unique.rs @@ -70,8 +70,7 @@ impl PluginCommand for NUnique { call: &EvaluatedCall, input: PipelineData, ) -> Result { - let value = input.into_value(call.head); - + let value = input.into_value(call.head)?; match PolarsPluginObject::try_from_value(plugin, &value)? { PolarsPluginObject::NuDataFrame(df) => command(plugin, engine, call, df), PolarsPluginObject::NuLazyFrame(lazy) => { diff --git a/crates/nu_plugin_polars/src/dataframe/series/shift.rs b/crates/nu_plugin_polars/src/dataframe/series/shift.rs index 556b3361c1..c37ba2f2e9 100644 --- a/crates/nu_plugin_polars/src/dataframe/series/shift.rs +++ b/crates/nu_plugin_polars/src/dataframe/series/shift.rs @@ -92,7 +92,7 @@ impl PluginCommand for Shift { call: &EvaluatedCall, input: PipelineData, ) -> Result { - let value = input.into_value(call.head); + let value = input.into_value(call.head)?; let lazy = NuLazyFrame::try_from_value_coerce(plugin, &value)?; command_lazy(plugin, engine, call, lazy).map_err(LabeledError::from) } diff --git a/crates/nu_plugin_polars/src/dataframe/series/unique.rs b/crates/nu_plugin_polars/src/dataframe/series/unique.rs index 47efd880a6..2475ad026a 100644 --- a/crates/nu_plugin_polars/src/dataframe/series/unique.rs +++ b/crates/nu_plugin_polars/src/dataframe/series/unique.rs @@ -134,7 +134,7 @@ impl PluginCommand for Unique { call: &EvaluatedCall, input: PipelineData, ) -> Result { - let value = input.into_value(call.head); + let value = input.into_value(call.head)?; let df = NuLazyFrame::try_from_value_coerce(plugin, &value)?; command_lazy(plugin, engine, call, df).map_err(LabeledError::from) } diff --git a/crates/nu_plugin_polars/src/dataframe/values/mod.rs b/crates/nu_plugin_polars/src/dataframe/values/mod.rs index a43c8f2412..179c85bf36 100644 --- a/crates/nu_plugin_polars/src/dataframe/values/mod.rs +++ b/crates/nu_plugin_polars/src/dataframe/values/mod.rs @@ -84,7 +84,7 @@ impl PolarsPluginObject { input: PipelineData, span: Span, ) -> Result { - let value = input.into_value(span); + let value = input.into_value(span)?; Self::try_from_value(plugin, &value) } @@ -242,7 +242,7 @@ pub trait PolarsPluginCustomValue: CustomValue { /// Handles the ability for a PolarsObjectType implementations to convert between /// their respective CustValue type. /// PolarsPluginObjectType's (NuDataFrame, NuLazyFrame) should -/// implement this trait. +/// implement this trait. pub trait CustomValueSupport: Cacheable { type CV: PolarsPluginCustomValue + CustomValue + 'static; @@ -301,7 +301,7 @@ pub trait CustomValueSupport: Cacheable { input: PipelineData, span: Span, ) -> Result { - let value = input.into_value(span); + let value = input.into_value(span)?; Self::try_from_value(plugin, &value) } diff --git a/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/mod.rs b/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/mod.rs index 30a5ea691d..46132133dd 100644 --- a/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/mod.rs +++ b/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/mod.rs @@ -519,7 +519,7 @@ impl NuDataFrame { input: PipelineData, span: Span, ) -> Result { - let value = input.into_value(span); + let value = input.into_value(span)?; Self::try_from_value_coerce(plugin, &value, span) } } diff --git a/crates/nu_plugin_polars/src/dataframe/values/nu_lazyframe/mod.rs b/crates/nu_plugin_polars/src/dataframe/values/nu_lazyframe/mod.rs index f3c969b03d..48e296e95e 100644 --- a/crates/nu_plugin_polars/src/dataframe/values/nu_lazyframe/mod.rs +++ b/crates/nu_plugin_polars/src/dataframe/values/nu_lazyframe/mod.rs @@ -109,7 +109,7 @@ impl NuLazyFrame { input: PipelineData, span: Span, ) -> Result { - let value = input.into_value(span); + let value = input.into_value(span)?; Self::try_from_value_coerce(plugin, &value) } } diff --git a/src/main.rs b/src/main.rs index db0c80d4f2..d0fc023b68 100644 --- a/src/main.rs +++ b/src/main.rs @@ -25,15 +25,13 @@ use nu_cmd_base::util::get_init_cwd; use nu_lsp::LanguageServer; use nu_path::canonicalize_with; use nu_protocol::{ - engine::EngineState, report_error_new, util::BufferedReader, PipelineData, RawStream, - ShellError, Span, Value, + engine::EngineState, report_error_new, ByteStream, PipelineData, ShellError, Span, Value, }; use nu_std::load_standard_library; use nu_utils::utils::perf; use run::{run_commands, run_file, run_repl}; use signals::ctrlc_protection; use std::{ - io::BufReader, path::PathBuf, str::FromStr, sync::{atomic::AtomicBool, Arc}, @@ -345,22 +343,7 @@ fn main() -> Result<()> { start_time = std::time::Instant::now(); let input = if let Some(redirect_stdin) = &parsed_nu_cli_args.redirect_stdin { trace!("redirecting stdin"); - let stdin = std::io::stdin(); - let buf_reader = BufReader::new(stdin); - - PipelineData::ExternalStream { - stdout: Some(RawStream::new( - Box::new(BufferedReader::new(buf_reader)), - Some(ctrlc.clone()), - redirect_stdin.span, - None, - )), - stderr: None, - exit_code: None, - span: redirect_stdin.span, - metadata: None, - trim_end_newline: false, - } + PipelineData::ByteStream(ByteStream::stdin(redirect_stdin.span)?, None) } else { trace!("not redirecting stdin"); PipelineData::empty() @@ -450,7 +433,7 @@ fn main() -> Result<()> { ); } - LanguageServer::initialize_stdio_connection()?.serve_requests(engine_state, ctrlc) + LanguageServer::initialize_stdio_connection()?.serve_requests(engine_state, ctrlc)? } else if let Some(commands) = parsed_nu_cli_args.commands.clone() { run_commands( &mut engine_state, @@ -460,7 +443,6 @@ fn main() -> Result<()> { input, entire_start_time, ); - Ok(()) } else if !script_name.is_empty() { run_file( &mut engine_state, @@ -470,8 +452,9 @@ fn main() -> Result<()> { args_to_script, input, ); - Ok(()) } else { - run_repl(&mut engine_state, parsed_nu_cli_args, entire_start_time) + run_repl(&mut engine_state, parsed_nu_cli_args, entire_start_time)? } + + Ok(()) } diff --git a/tests/plugins/stream.rs b/tests/plugins/stream.rs index 8530e5bc32..b8771580f7 100644 --- a/tests/plugins/stream.rs +++ b/tests/plugins/stream.rs @@ -119,40 +119,40 @@ fn sum_big_stream() { } #[test] -fn collect_external_accepts_list_of_string() { +fn collect_bytes_accepts_list_of_string() { let actual = nu_with_plugins!( cwd: "tests/fixtures/formats", plugin: ("nu_plugin_example"), - "[a b] | example collect-external" + "[a b] | example collect-bytes" ); assert_eq!(actual.out, "ab"); } #[test] -fn collect_external_accepts_list_of_binary() { +fn collect_bytes_accepts_list_of_binary() { let actual = nu_with_plugins!( cwd: "tests/fixtures/formats", plugin: ("nu_plugin_example"), - "[0x[41] 0x[42]] | example collect-external" + "[0x[41] 0x[42]] | example collect-bytes" ); assert_eq!(actual.out, "AB"); } #[test] -fn collect_external_produces_raw_input() { +fn collect_bytes_produces_byte_stream() { let actual = nu_with_plugins!( cwd: "tests/fixtures/formats", plugin: ("nu_plugin_example"), - "[a b c] | example collect-external | describe" + "[a b c] | example collect-bytes | describe" ); - assert_eq!(actual.out, "raw input"); + assert_eq!(actual.out, "byte stream"); } #[test] -fn collect_external_big_stream() { +fn collect_bytes_big_stream() { // This in particular helps to ensure that a big stream can be both read and written at the same // time without deadlocking let actual = nu_with_plugins!( @@ -160,9 +160,8 @@ fn collect_external_big_stream() { plugin: ("nu_plugin_example"), r#"( seq 1 10000 | - to text | - each { into string } | - example collect-external | + each {|i| ($i | into string) ++ (char newline) } | + example collect-bytes | lines | length )"# diff --git a/tests/shell/pipeline/commands/internal.rs b/tests/shell/pipeline/commands/internal.rs index 7bc75b07ad..6c2226ff65 100644 --- a/tests/shell/pipeline/commands/internal.rs +++ b/tests/shell/pipeline/commands/internal.rs @@ -1131,13 +1131,13 @@ fn pipe_input_to_print() { #[test] fn err_pipe_input_to_print() { let actual = nu!(r#""foo" e>| print"#); - assert!(actual.err.contains("only works on external streams")); + assert!(actual.err.contains("only works on external commands")); } #[test] fn outerr_pipe_input_to_print() { let actual = nu!(r#""foo" o+e>| print"#); - assert!(actual.err.contains("only works on external streams")); + assert!(actual.err.contains("only works on external commands")); } #[test] From 1c00a6ca5eea783dc2c67c60e75cc713130b5cfd Mon Sep 17 00:00:00 2001 From: Darren Schroeder <343840+fdncred@users.noreply.github.com> Date: Thu, 16 May 2024 15:26:03 -0500 Subject: [PATCH 042/137] sync up with reedline changes (#12881) # Description sync-up nushell to reedline's latest minor changes. Not quite sure why itertools downgraded to 0.11.0 when nushell and reedline have it set to 0.12.0. --- Cargo.lock | 5 ++--- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5a9b95cebd..ad5a1e3a76 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -377,7 +377,7 @@ dependencies = [ "bitflags 2.5.0", "cexpr", "clang-sys", - "itertools 0.12.1", + "itertools 0.11.0", "lazy_static", "lazycell", "proc-macro2", @@ -4843,8 +4843,7 @@ dependencies = [ [[package]] name = "reedline" version = "0.32.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abf59e4c97b5049ba96b052cdb652368305a2eddcbce9bf1c16f9d003139eeea" +source = "git+https://github.com/nushell/reedline?branch=main#a580ea56d4e5a889468b2969d2a1534379504ab6" dependencies = [ "arboard", "chrono", diff --git a/Cargo.toml b/Cargo.toml index 1366ecbdd8..2e9c7e0b0f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -305,7 +305,7 @@ bench = false # To use a development version of a dependency please use a global override here # changing versions in each sub-crate of the workspace is tedious [patch.crates-io] -# reedline = { git = "https://github.com/nushell/reedline", branch = "main" } +reedline = { git = "https://github.com/nushell/reedline", branch = "main" } # nu-ansi-term = {git = "https://github.com/nushell/nu-ansi-term.git", branch = "main"} # Run all benchmarks with `cargo bench` From 2a09dccc11937c8255d10fa91ed6547f5e234696 Mon Sep 17 00:00:00 2001 From: Ian Manske Date: Thu, 16 May 2024 21:15:20 +0000 Subject: [PATCH 043/137] `Bytestream` touchup (#12886) # Description Adds some docs and a small fix to `Chunks`. --- .../nu-protocol/src/pipeline/byte_stream.rs | 27 +++++++++++++------ 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/crates/nu-protocol/src/pipeline/byte_stream.rs b/crates/nu-protocol/src/pipeline/byte_stream.rs index f57aecacba..64b566a625 100644 --- a/crates/nu-protocol/src/pipeline/byte_stream.rs +++ b/crates/nu-protocol/src/pipeline/byte_stream.rs @@ -59,6 +59,13 @@ impl Read for SourceReader { /// A potentially infinite, interruptible stream of bytes. /// +/// To create a [`ByteStream`], you can use any of the following methods: +/// - [`read`](ByteStream::read): takes any type that implements [`Read`]. +/// - [`file`](ByteStream::file): takes a [`File`]. +/// - [`from_iter`](ByteStream::from_iter): takes an [`Iterator`] whose items implement `AsRef<[u8]>`. +/// - [`from_result_iter`](ByteStream::from_result_iter): same as [`from_iter`](ByteStream::from_iter), +/// but each item is a `Result`. +/// /// The data of a [`ByteStream`] can be accessed using one of the following methods: /// - [`reader`](ByteStream::reader): returns a [`Read`]-able type to get the raw bytes in the stream. /// - [`lines`](ByteStream::lines): splits the bytes on lines and returns an [`Iterator`] @@ -626,14 +633,18 @@ impl Iterator for Chunks { if nu_utils::ctrl_c::was_pressed(&self.ctrlc) { None } else { - match self.reader.fill_buf() { - Ok(buf) => { - self.leftover.extend_from_slice(buf); - let len = buf.len(); - self.reader.consume(len); - } - Err(err) => return Some(Err(err.into_spanned(self.span).into())), - }; + loop { + match self.reader.fill_buf() { + Ok(buf) => { + self.leftover.extend_from_slice(buf); + let len = buf.len(); + self.reader.consume(len); + break; + } + Err(e) if e.kind() == io::ErrorKind::Interrupted => continue, + Err(err) => return Some(Err(err.into_spanned(self.span).into())), + }; + } if self.leftover.is_empty() { return None; From aec41f3df0561a43c89163174842241866b0fa19 Mon Sep 17 00:00:00 2001 From: Ian Manske Date: Thu, 16 May 2024 22:34:49 +0000 Subject: [PATCH 044/137] Add `Span` merging functions (#12511) # Description This PR adds a few functions to `Span` for merging spans together: - `Span::append`: merges two spans that are known to be in order. - `Span::concat`: returns a span that encompasses all the spans in a slice. The spans must be in order. - `Span::merge`: merges two spans (no order necessary). - `Span::merge_many`: merges an iterator of spans into a single span (no order necessary). These are meant to replace the free-standing `nu_protocol::span` function. The spans in a `LiteCommand` (the `parts`) should always be in order based on the lite parser and lexer. So, the parser code sees the most usage of `Span::append` and `Span::concat` where the order is known. In other code areas, `Span::merge` and `Span::merge_many` are used since the order between spans is often not known. --- .../values/nu_dataframe/between_values.rs | 6 +- .../src/dataframe/values/utils.rs | 6 +- crates/nu-command/src/help/help_.rs | 4 +- crates/nu-command/src/help/help_aliases.rs | 5 +- crates/nu-command/src/help/help_commands.rs | 3 +- crates/nu-command/src/help/help_externs.rs | 3 +- crates/nu-command/src/help/help_modules.rs | 4 +- crates/nu-command/src/platform/is_terminal.rs | 6 +- crates/nu-command/src/platform/kill.rs | 5 +- crates/nu-parser/src/parse_keywords.rs | 263 ++++++++++-------- crates/nu-parser/src/parser.rs | 64 ++--- crates/nu-protocol/src/ast/import_pattern.rs | 35 ++- crates/nu-protocol/src/span.rs | 123 +++++--- .../values/nu_dataframe/between_values.rs | 9 +- .../src/dataframe/values/utils.rs | 10 +- 15 files changed, 305 insertions(+), 241 deletions(-) diff --git a/crates/nu-cmd-dataframe/src/dataframe/values/nu_dataframe/between_values.rs b/crates/nu-cmd-dataframe/src/dataframe/values/nu_dataframe/between_values.rs index fab201cf9a..74a484825a 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/values/nu_dataframe/between_values.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/values/nu_dataframe/between_values.rs @@ -1,7 +1,7 @@ use super::{operations::Axis, NuDataFrame}; use nu_protocol::{ ast::{Boolean, Comparison, Math, Operator}, - span, ShellError, Span, Spanned, Value, + ShellError, Span, Spanned, Value, }; use num::Zero; use polars::prelude::{ @@ -17,7 +17,7 @@ pub(super) fn between_dataframes( right: &Value, rhs: &NuDataFrame, ) -> Result { - let operation_span = span(&[left.span(), right.span()]); + let operation_span = Span::merge(left.span(), right.span()); match operator.item { Operator::Math(Math::Plus) => match lhs.append_df(rhs, Axis::Row, operation_span) { Ok(df) => Ok(df.into_value(operation_span)), @@ -40,7 +40,7 @@ pub(super) fn compute_between_series( right: &Value, rhs: &Series, ) -> Result { - let operation_span = span(&[left.span(), right.span()]); + let operation_span = Span::merge(left.span(), right.span()); match operator.item { Operator::Math(Math::Plus) => { let mut res = lhs + rhs; diff --git a/crates/nu-cmd-dataframe/src/dataframe/values/utils.rs b/crates/nu-cmd-dataframe/src/dataframe/values/utils.rs index 3ccf2e6d77..0dc43399a3 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/values/utils.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/values/utils.rs @@ -1,4 +1,4 @@ -use nu_protocol::{span as span_join, ShellError, Span, Spanned, Value}; +use nu_protocol::{ShellError, Span, Spanned, Value}; // Default value used when selecting rows from dataframe pub const DEFAULT_ROWS: usize = 5; @@ -27,7 +27,7 @@ pub(crate) fn convert_columns( let span = value.span(); match value { Value::String { val, .. } => { - col_span = span_join(&[col_span, span]); + col_span = col_span.merge(span); Ok(Spanned { item: val, span }) } _ => Err(ShellError::GenericError { @@ -68,7 +68,7 @@ pub(crate) fn convert_columns_string( let span = value.span(); match value { Value::String { val, .. } => { - col_span = span_join(&[col_span, span]); + col_span = col_span.merge(span); Ok(val) } _ => Err(ShellError::GenericError { diff --git a/crates/nu-command/src/help/help_.rs b/crates/nu-command/src/help/help_.rs index 819d3c7dd9..9835a20ad0 100644 --- a/crates/nu-command/src/help/help_.rs +++ b/crates/nu-command/src/help/help_.rs @@ -2,7 +2,6 @@ use crate::help::{help_aliases, help_commands, help_modules}; use fancy_regex::Regex; use nu_ansi_term::Style; use nu_engine::command_prelude::*; -use nu_protocol::span; use nu_utils::IgnoreCaseExt; #[derive(Clone)] @@ -97,9 +96,8 @@ You can also learn more at https://www.nushell.sh/book/"#; span: _, }) = result { - let rest_spans: Vec = rest.iter().map(|arg| arg.span).collect(); Err(ShellError::NotFound { - span: span(&rest_spans), + span: Span::merge_many(rest.iter().map(|s| s.span)), }) } else { result diff --git a/crates/nu-command/src/help/help_aliases.rs b/crates/nu-command/src/help/help_aliases.rs index 2cc7c7f073..da03fa6398 100644 --- a/crates/nu-command/src/help/help_aliases.rs +++ b/crates/nu-command/src/help/help_aliases.rs @@ -1,7 +1,6 @@ use crate::help::highlight_search_in_table; use nu_color_config::StyleComputer; use nu_engine::{command_prelude::*, scope::ScopeData}; -use nu_protocol::span; #[derive(Clone)] pub struct HelpAliases; @@ -110,13 +109,13 @@ pub fn help_aliases( let Some(alias) = engine_state.find_decl(name.as_bytes(), &[]) else { return Err(ShellError::AliasNotFound { - span: span(&rest.iter().map(|r| r.span).collect::>()), + span: Span::merge_many(rest.iter().map(|s| s.span)), }); }; let Some(alias) = engine_state.get_decl(alias).as_alias() else { return Err(ShellError::AliasNotFound { - span: span(&rest.iter().map(|r| r.span).collect::>()), + span: Span::merge_many(rest.iter().map(|s| s.span)), }); }; diff --git a/crates/nu-command/src/help/help_commands.rs b/crates/nu-command/src/help/help_commands.rs index bc508b249b..bc0fd92d92 100644 --- a/crates/nu-command/src/help/help_commands.rs +++ b/crates/nu-command/src/help/help_commands.rs @@ -1,7 +1,6 @@ use crate::help::highlight_search_in_table; use nu_color_config::StyleComputer; use nu_engine::{command_prelude::*, get_full_help}; -use nu_protocol::span; #[derive(Clone)] pub struct HelpCommands; @@ -104,7 +103,7 @@ pub fn help_commands( ) } else { Err(ShellError::CommandNotFound { - span: span(&[rest[0].span, rest[rest.len() - 1].span]), + span: Span::merge_many(rest.iter().map(|s| s.span)), }) } } diff --git a/crates/nu-command/src/help/help_externs.rs b/crates/nu-command/src/help/help_externs.rs index 624a8d8060..22fb4a303c 100644 --- a/crates/nu-command/src/help/help_externs.rs +++ b/crates/nu-command/src/help/help_externs.rs @@ -1,7 +1,6 @@ use crate::help::highlight_search_in_table; use nu_color_config::StyleComputer; use nu_engine::{command_prelude::*, get_full_help, scope::ScopeData}; -use nu_protocol::span; #[derive(Clone)] pub struct HelpExterns; @@ -124,7 +123,7 @@ pub fn help_externs( ) } else { Err(ShellError::CommandNotFound { - span: span(&[rest[0].span, rest[rest.len() - 1].span]), + span: Span::merge_many(rest.iter().map(|s| s.span)), }) } } diff --git a/crates/nu-command/src/help/help_modules.rs b/crates/nu-command/src/help/help_modules.rs index f2ddf55f1d..690968251b 100644 --- a/crates/nu-command/src/help/help_modules.rs +++ b/crates/nu-command/src/help/help_modules.rs @@ -1,7 +1,7 @@ use crate::help::highlight_search_in_table; use nu_color_config::StyleComputer; use nu_engine::{command_prelude::*, scope::ScopeData}; -use nu_protocol::{span, DeclId}; +use nu_protocol::DeclId; #[derive(Clone)] pub struct HelpModules; @@ -117,7 +117,7 @@ pub fn help_modules( let Some(module_id) = engine_state.find_module(name.as_bytes(), &[]) else { return Err(ShellError::ModuleNotFoundAtRuntime { mod_name: name, - span: span(&rest.iter().map(|r| r.span).collect::>()), + span: Span::merge_many(rest.iter().map(|s| s.span)), }); }; diff --git a/crates/nu-command/src/platform/is_terminal.rs b/crates/nu-command/src/platform/is_terminal.rs index 770fa45289..c67329e839 100644 --- a/crates/nu-command/src/platform/is_terminal.rs +++ b/crates/nu-command/src/platform/is_terminal.rs @@ -1,5 +1,4 @@ use nu_engine::command_prelude::*; -use nu_protocol::span; use std::io::IsTerminal as _; #[derive(Clone)] @@ -57,12 +56,9 @@ impl Command for IsTerminal { }); } _ => { - let spans: Vec<_> = call.arguments.iter().map(|arg| arg.span()).collect(); - let span = span(&spans); - return Err(ShellError::IncompatibleParametersSingle { msg: "Only one stream may be checked".into(), - span, + span: Span::merge_many(call.arguments.iter().map(|arg| arg.span())), }); } }; diff --git a/crates/nu-command/src/platform/kill.rs b/crates/nu-command/src/platform/kill.rs index 59486cf1ad..2e47ee8c78 100644 --- a/crates/nu-command/src/platform/kill.rs +++ b/crates/nu-command/src/platform/kill.rs @@ -1,5 +1,4 @@ use nu_engine::command_prelude::*; -use nu_protocol::span; use std::process::{Command as CommandSys, Stdio}; #[derive(Clone)] @@ -96,7 +95,7 @@ impl Command for Kill { })? .span, right_message: "signal".to_string(), - right_span: span(&[ + right_span: Span::merge( call.get_named_arg("signal") .ok_or_else(|| ShellError::GenericError { error: "Flag error".into(), @@ -107,7 +106,7 @@ impl Command for Kill { })? .span, signal_span, - ]), + ), }); } cmd.arg("-9"); diff --git a/crates/nu-parser/src/parse_keywords.rs b/crates/nu-parser/src/parse_keywords.rs index 381a86d19e..015873e69a 100644 --- a/crates/nu-parser/src/parse_keywords.rs +++ b/crates/nu-parser/src/parse_keywords.rs @@ -15,8 +15,8 @@ use nu_protocol::{ }, engine::{StateWorkingSet, DEFAULT_OVERLAY_NAME}, eval_const::eval_constant, - span, Alias, BlockId, DeclId, Module, ModuleId, ParseError, PositionalArg, - ResolvedImportPattern, Span, Spanned, SyntaxShape, Type, Value, VarId, + Alias, BlockId, DeclId, Module, ModuleId, ParseError, PositionalArg, ResolvedImportPattern, + Span, Spanned, SyntaxShape, Type, Value, VarId, }; use std::{ collections::{HashMap, HashSet}, @@ -77,14 +77,14 @@ pub const UNALIASABLE_PARSER_KEYWORDS: &[&[u8]] = &[ /// Check whether spans start with a parser keyword that can be aliased pub fn is_unaliasable_parser_keyword(working_set: &StateWorkingSet, spans: &[Span]) -> bool { // try two words - if let (Some(span1), Some(span2)) = (spans.first(), spans.get(1)) { - let cmd_name = working_set.get_span_contents(span(&[*span1, *span2])); + if let (Some(&span1), Some(&span2)) = (spans.first(), spans.get(1)) { + let cmd_name = working_set.get_span_contents(Span::append(span1, span2)); return UNALIASABLE_PARSER_KEYWORDS.contains(&cmd_name); } // try one word - if let Some(span1) = spans.first() { - let cmd_name = working_set.get_span_contents(*span1); + if let Some(&span1) = spans.first() { + let cmd_name = working_set.get_span_contents(span1); UNALIASABLE_PARSER_KEYWORDS.contains(&cmd_name) } else { false @@ -254,7 +254,7 @@ pub fn parse_for(working_set: &mut StateWorkingSet, lite_command: &LiteCommand) if working_set.get_span_contents(spans[0]) != b"for" { working_set.error(ParseError::UnknownState( "internal error: Wrong call name for 'for' function".into(), - span(spans), + Span::concat(spans), )); return garbage(spans[0]); } @@ -270,7 +270,7 @@ pub fn parse_for(working_set: &mut StateWorkingSet, lite_command: &LiteCommand) None => { working_set.error(ParseError::UnknownState( "internal error: for declaration not found".into(), - span(spans), + Span::concat(spans), )); return garbage(spans[0]); } @@ -281,7 +281,7 @@ pub fn parse_for(working_set: &mut StateWorkingSet, lite_command: &LiteCommand) working_set.exit_scope(); - let call_span = span(spans); + let call_span = Span::concat(spans); let decl = working_set.get_decl(decl_id); let sig = decl.signature(); @@ -395,7 +395,7 @@ pub fn parse_def( if def_call != b"def" { working_set.error(ParseError::UnknownState( "internal error: Wrong call name for def function".into(), - span(spans), + Span::concat(spans), )); return (garbage_pipeline(spans), None); } @@ -411,7 +411,7 @@ pub fn parse_def( None => { working_set.error(ParseError::UnknownState( "internal error: def declaration not found".into(), - span(spans), + Span::concat(spans), )); return (garbage_pipeline(spans), None); } @@ -442,8 +442,12 @@ pub fn parse_def( } let starting_error_count = working_set.parse_errors.len(); - let ParsedInternalCall { call, output } = - parse_internal_call(working_set, span(command_spans), rest_spans, decl_id); + let ParsedInternalCall { call, output } = parse_internal_call( + working_set, + Span::concat(command_spans), + rest_spans, + decl_id, + ); // This is to preserve the order of the errors so that // the check errors below come first let mut new_errors = working_set.parse_errors[starting_error_count..].to_vec(); @@ -451,7 +455,7 @@ pub fn parse_def( working_set.exit_scope(); - let call_span = span(spans); + let call_span = Span::concat(spans); let decl = working_set.get_decl(decl_id); let sig = decl.signature(); @@ -673,7 +677,7 @@ pub fn parse_extern( if extern_call != b"extern" { working_set.error(ParseError::UnknownState( "internal error: Wrong call name for extern command".into(), - span(spans), + Span::concat(spans), )); return garbage_pipeline(spans); } @@ -689,7 +693,7 @@ pub fn parse_extern( None => { working_set.error(ParseError::UnknownState( "internal error: def declaration not found".into(), - span(spans), + Span::concat(spans), )); return garbage_pipeline(spans); } @@ -709,11 +713,15 @@ pub fn parse_extern( } } - let ParsedInternalCall { call, .. } = - parse_internal_call(working_set, span(command_spans), rest_spans, decl_id); + let ParsedInternalCall { call, .. } = parse_internal_call( + working_set, + Span::concat(command_spans), + rest_spans, + decl_id, + ); working_set.exit_scope(); - let call_span = span(spans); + let call_span = Span::concat(spans); //let decl = working_set.get_decl(decl_id); //let sig = decl.signature(); @@ -824,8 +832,9 @@ fn check_alias_name<'a>(working_set: &mut StateWorkingSet, spans: &'a [Span]) -> None } else if spans.len() < command_len + 3 { if working_set.get_span_contents(spans[command_len]) == b"=" { - let name = - String::from_utf8_lossy(working_set.get_span_contents(span(&spans[..command_len]))); + let name = String::from_utf8_lossy( + working_set.get_span_contents(Span::concat(&spans[..command_len])), + ); working_set.error(ParseError::AssignmentMismatch( format!("{name} missing name"), "missing name".into(), @@ -836,8 +845,9 @@ fn check_alias_name<'a>(working_set: &mut StateWorkingSet, spans: &'a [Span]) -> None } } else if working_set.get_span_contents(spans[command_len + 1]) != b"=" { - let name = - String::from_utf8_lossy(working_set.get_span_contents(span(&spans[..command_len]))); + let name = String::from_utf8_lossy( + working_set.get_span_contents(Span::concat(&spans[..command_len])), + ); working_set.error(ParseError::AssignmentMismatch( format!("{name} missing sign"), "missing equal sign".into(), @@ -868,7 +878,7 @@ pub fn parse_alias( if name != b"alias" { working_set.error(ParseError::InternalError( "Alias statement unparsable".into(), - span(spans), + Span::concat(spans), )); return garbage_pipeline(spans); } @@ -890,7 +900,12 @@ pub fn parse_alias( call: alias_call, output, .. - } = parse_internal_call(working_set, span(command_spans), rest_spans, decl_id); + } = parse_internal_call( + working_set, + Span::concat(command_spans), + rest_spans, + decl_id, + ); working_set .parse_errors @@ -902,7 +917,7 @@ pub fn parse_alias( let alias_pipeline = Pipeline::from_vec(vec![Expression { expr: Expr::Call(alias_call.clone()), - span: span(spans), + span: Span::concat(spans), ty: output, custom_completion: None, }]); @@ -914,7 +929,7 @@ pub fn parse_alias( let Some(alias_name_expr) = alias_call.positional_nth(0) else { working_set.error(ParseError::UnknownState( "Missing positional after call check".to_string(), - span(spans), + Span::concat(spans), )); return garbage_pipeline(spans); }; @@ -1090,7 +1105,7 @@ pub fn parse_alias( } else if spans.len() < 4 { working_set.error(ParseError::IncorrectValue( "Incomplete alias".into(), - span(&spans[..split_id]), + Span::concat(&spans[..split_id]), "incomplete alias".into(), )); } @@ -1100,7 +1115,7 @@ pub fn parse_alias( working_set.error(ParseError::InternalError( "Alias statement unparsable".into(), - span(spans), + Span::concat(spans), )); garbage_pipeline(spans) @@ -1111,7 +1126,7 @@ pub fn parse_export_in_block( working_set: &mut StateWorkingSet, lite_command: &LiteCommand, ) -> Pipeline { - let call_span = span(&lite_command.parts); + let call_span = Span::concat(&lite_command.parts); let full_name = if lite_command.parts.len() > 1 { let sub = working_set.get_span_contents(lite_command.parts[1]); @@ -1139,7 +1154,7 @@ pub fn parse_export_in_block( if full_name == "export" { lite_command.parts[0] } else { - span(&lite_command.parts[0..2]) + Span::concat(&lite_command.parts[0..2]) }, if full_name == "export" { &lite_command.parts[1..] @@ -1169,7 +1184,7 @@ pub fn parse_export_in_block( } else { working_set.error(ParseError::UnknownState( format!("internal error: '{full_name}' declaration not found",), - span(&lite_command.parts), + Span::concat(&lite_command.parts), )); return garbage_pipeline(&lite_command.parts); }; @@ -1213,7 +1228,7 @@ pub fn parse_export_in_module( if working_set.get_span_contents(*sp) != b"export" { working_set.error(ParseError::UnknownState( "expected export statement".into(), - span(spans), + Span::concat(spans), )); return (garbage_pipeline(spans), vec![]); } @@ -1222,7 +1237,7 @@ pub fn parse_export_in_module( } else { working_set.error(ParseError::UnknownState( "got empty input for parsing export statement".into(), - span(spans), + Span::concat(spans), )); return (garbage_pipeline(spans), vec![]); }; @@ -1280,12 +1295,12 @@ pub fn parse_export_in_module( if let Some(Expr::Call(def_call)) = pipeline.elements.first().map(|e| &e.expr.expr) { call.clone_from(def_call); - call.head = span(&spans[0..=1]); + call.head = Span::concat(&spans[0..=1]); call.decl_id = export_def_decl_id; } else { working_set.error(ParseError::InternalError( "unexpected output from parsing a definition".into(), - span(&spans[1..]), + Span::concat(&spans[1..]), )); }; @@ -1316,12 +1331,12 @@ pub fn parse_export_in_module( if let Some(Expr::Call(def_call)) = pipeline.elements.first().map(|e| &e.expr.expr) { call.clone_from(def_call); - call.head = span(&spans[0..=1]); + call.head = Span::concat(&spans[0..=1]); call.decl_id = export_def_decl_id; } else { working_set.error(ParseError::InternalError( "unexpected output from parsing a definition".into(), - span(&spans[1..]), + Span::concat(&spans[1..]), )); }; @@ -1341,7 +1356,7 @@ pub fn parse_export_in_module( } else { working_set.error(ParseError::InternalError( "failed to find added declaration".into(), - span(&spans[1..]), + Span::concat(&spans[1..]), )); } @@ -1373,12 +1388,12 @@ pub fn parse_export_in_module( { call.clone_from(alias_call); - call.head = span(&spans[0..=1]); + call.head = Span::concat(&spans[0..=1]); call.decl_id = export_alias_decl_id; } else { working_set.error(ParseError::InternalError( "unexpected output from parsing a definition".into(), - span(&spans[1..]), + Span::concat(&spans[1..]), )); }; @@ -1398,7 +1413,7 @@ pub fn parse_export_in_module( } else { working_set.error(ParseError::InternalError( "failed to find added alias".into(), - span(&spans[1..]), + Span::concat(&spans[1..]), )); } @@ -1428,12 +1443,12 @@ pub fn parse_export_in_module( { call.clone_from(use_call); - call.head = span(&spans[0..=1]); + call.head = Span::concat(&spans[0..=1]); call.decl_id = export_use_decl_id; } else { working_set.error(ParseError::InternalError( "unexpected output from parsing a definition".into(), - span(&spans[1..]), + Span::concat(&spans[1..]), )); }; @@ -1460,12 +1475,12 @@ pub fn parse_export_in_module( { call.clone_from(module_call); - call.head = span(&spans[0..=1]); + call.head = Span::concat(&spans[0..=1]); call.decl_id = export_module_decl_id; } else { working_set.error(ParseError::InternalError( "unexpected output from parsing a definition".into(), - span(&spans[1..]), + Span::concat(&spans[1..]), )); }; @@ -1486,7 +1501,7 @@ pub fn parse_export_in_module( "failed to find added module '{}'", String::from_utf8_lossy(module_name) ), - span(&spans[1..]), + Span::concat(&spans[1..]), )); } } @@ -1511,12 +1526,12 @@ pub fn parse_export_in_module( { call.clone_from(def_call); - call.head = span(&spans[0..=1]); + call.head = Span::concat(&spans[0..=1]); call.decl_id = export_const_decl_id; } else { working_set.error(ParseError::InternalError( "unexpected output from parsing a definition".into(), - span(&spans[1..]), + Span::concat(&spans[1..]), )); }; @@ -1538,7 +1553,7 @@ pub fn parse_export_in_module( } else { working_set.error(ParseError::InternalError( "failed to find added variable".into(), - span(&spans[1..]), + Span::concat(&spans[1..]), )); } } @@ -1567,7 +1582,7 @@ pub fn parse_export_in_module( ( Pipeline::from_vec(vec![Expression { expr: Expr::Call(call), - span: span(spans), + span: Span::concat(spans), ty: Type::Any, custom_completion: None, }]), @@ -1582,7 +1597,7 @@ pub fn parse_export_env( if !spans.is_empty() && working_set.get_span_contents(spans[0]) != b"export-env" { working_set.error(ParseError::UnknownState( "internal error: Wrong call name for 'export-env' command".into(), - span(spans), + Span::concat(spans), )); return (garbage_pipeline(spans), None); } @@ -1590,7 +1605,7 @@ pub fn parse_export_env( if spans.len() < 2 { working_set.error(ParseError::MissingPositional( "block".into(), - span(spans), + Span::concat(spans), "export-env ".into(), )); return (garbage_pipeline(spans), None); @@ -1602,7 +1617,7 @@ pub fn parse_export_env( parse_internal_call(working_set, spans[0], &[spans[1]], decl_id); let decl = working_set.get_decl(decl_id); - let call_span = span(spans); + let call_span = Span::concat(spans); let starting_error_count = working_set.parse_errors.len(); check_call(working_set, call_span, &decl.signature(), &call); @@ -1628,7 +1643,7 @@ pub fn parse_export_env( None => { working_set.error(ParseError::UnknownState( "internal error: 'export-env' declaration not found".into(), - span(spans), + Span::concat(spans), )); return (garbage_pipeline(spans), None); } @@ -1647,14 +1662,14 @@ pub fn parse_export_env( } else { working_set.error(ParseError::UnknownState( "internal error: 'export-env' block is missing".into(), - span(spans), + Span::concat(spans), )); return (garbage_pipeline(spans), None); }; let pipeline = Pipeline::from_vec(vec![Expression { expr: Expr::Call(call), - span: span(spans), + span: Span::concat(spans), ty: Type::Any, custom_completion: None, }]); @@ -2050,11 +2065,15 @@ pub fn parse_module( Some(decl_id) => { let (command_spans, rest_spans) = spans.split_at(split_id); - let ParsedInternalCall { call, output } = - parse_internal_call(working_set, span(command_spans), rest_spans, decl_id); + let ParsedInternalCall { call, output } = parse_internal_call( + working_set, + Span::concat(command_spans), + rest_spans, + decl_id, + ); let decl = working_set.get_decl(decl_id); - let call_span = span(spans); + let call_span = Span::concat(spans); let starting_error_count = working_set.parse_errors.len(); check_call(working_set, call_span, &decl.signature(), &call); @@ -2080,7 +2099,7 @@ pub fn parse_module( None => { working_set.error(ParseError::UnknownState( "internal error: 'module' or 'export module' declaration not found".into(), - span(spans), + Span::concat(spans), )); return (garbage_pipeline(spans), None); } @@ -2112,14 +2131,14 @@ pub fn parse_module( } else { working_set.error(ParseError::UnknownState( "internal error: name not a string".into(), - span(spans), + Span::concat(spans), )); return (garbage_pipeline(spans), None); } } else { working_set.error(ParseError::UnknownState( "internal error: missing positional".into(), - span(spans), + Span::concat(spans), )); return (garbage_pipeline(spans), None); }; @@ -2151,7 +2170,7 @@ pub fn parse_module( if spans.len() < split_id + 2 { working_set.error(ParseError::UnknownState( "Expected structure: module or module ".into(), - span(spans), + Span::concat(spans), )); return (garbage_pipeline(spans), None); @@ -2199,7 +2218,7 @@ pub fn parse_module( .expect("internal error: missing module command"); let call = Box::new(Call { - head: span(&spans[..split_id]), + head: Span::concat(&spans[..split_id]), decl_id: module_decl_id, arguments: vec![ Argument::Positional(module_name_or_path_expr), @@ -2211,7 +2230,7 @@ pub fn parse_module( ( Pipeline::from_vec(vec![Expression { expr: Expr::Call(call), - span: span(spans), + span: Span::concat(spans), ty: Type::Any, custom_completion: None, }]), @@ -2236,7 +2255,7 @@ pub fn parse_use( if use_call != b"use" { working_set.error(ParseError::UnknownState( "internal error: Wrong call name for 'use' command".into(), - span(spans), + Span::concat(spans), )); return (garbage_pipeline(spans), vec![]); } @@ -2244,7 +2263,7 @@ pub fn parse_use( if working_set.get_span_contents(name_span) != b"use" { working_set.error(ParseError::UnknownState( "internal error: Wrong call name for 'use' command".into(), - span(spans), + Span::concat(spans), )); return (garbage_pipeline(spans), vec![]); } @@ -2258,11 +2277,15 @@ pub fn parse_use( Some(decl_id) => { let (command_spans, rest_spans) = spans.split_at(split_id); - let ParsedInternalCall { call, output } = - parse_internal_call(working_set, span(command_spans), rest_spans, decl_id); + let ParsedInternalCall { call, output } = parse_internal_call( + working_set, + Span::concat(command_spans), + rest_spans, + decl_id, + ); let decl = working_set.get_decl(decl_id); - let call_span = span(spans); + let call_span = Span::concat(spans); let starting_error_count = working_set.parse_errors.len(); check_call(working_set, call_span, &decl.signature(), &call); @@ -2288,7 +2311,7 @@ pub fn parse_use( None => { working_set.error(ParseError::UnknownState( "internal error: 'use' declaration not found".into(), - span(spans), + Span::concat(spans), )); return (garbage_pipeline(spans), vec![]); } @@ -2418,7 +2441,7 @@ pub fn parse_use( // Create a new Use command call to pass the import pattern as parser info let import_pattern_expr = Expression { expr: Expr::ImportPattern(Box::new(import_pattern)), - span: span(args_spans), + span: Span::concat(args_spans), ty: Type::Any, custom_completion: None, }; @@ -2429,7 +2452,7 @@ pub fn parse_use( ( Pipeline::from_vec(vec![Expression { expr: Expr::Call(call), - span: span(spans), + span: Span::concat(spans), ty: Type::Any, custom_completion: None, }]), @@ -2443,7 +2466,7 @@ pub fn parse_hide(working_set: &mut StateWorkingSet, lite_command: &LiteCommand) if working_set.get_span_contents(spans[0]) != b"hide" { working_set.error(ParseError::UnknownState( "internal error: Wrong call name for 'hide' command".into(), - span(spans), + Span::concat(spans), )); return garbage_pipeline(spans); } @@ -2458,7 +2481,7 @@ pub fn parse_hide(working_set: &mut StateWorkingSet, lite_command: &LiteCommand) parse_internal_call(working_set, spans[0], &spans[1..], decl_id); let decl = working_set.get_decl(decl_id); - let call_span = span(spans); + let call_span = Span::concat(spans); let starting_error_count = working_set.parse_errors.len(); check_call(working_set, call_span, &decl.signature(), &call); @@ -2481,7 +2504,7 @@ pub fn parse_hide(working_set: &mut StateWorkingSet, lite_command: &LiteCommand) None => { working_set.error(ParseError::UnknownState( "internal error: 'hide' declaration not found".into(), - span(spans), + Span::concat(spans), )); return garbage_pipeline(spans); } @@ -2602,7 +2625,7 @@ pub fn parse_hide(working_set: &mut StateWorkingSet, lite_command: &LiteCommand) // Create a new Use command call to pass the new import pattern let import_pattern_expr = Expression { expr: Expr::ImportPattern(Box::new(import_pattern)), - span: span(args_spans), + span: Span::concat(args_spans), ty: Type::Any, custom_completion: None, }; @@ -2612,14 +2635,14 @@ pub fn parse_hide(working_set: &mut StateWorkingSet, lite_command: &LiteCommand) Pipeline::from_vec(vec![Expression { expr: Expr::Call(call), - span: span(spans), + span: Span::concat(spans), ty: Type::Any, custom_completion: None, }]) } else { working_set.error(ParseError::UnknownState( "Expected structure: hide ".into(), - span(spans), + Span::concat(spans), )); garbage_pipeline(spans) } @@ -2975,7 +2998,7 @@ pub fn parse_let(working_set: &mut StateWorkingSet, spans: &[Span]) -> Pipeline // let x = 'f', = at least start from index 2 if item == b"=" && spans.len() > (span.0 + 1) && span.0 > 1 { let (tokens, parse_error) = lex( - working_set.get_span_contents(nu_protocol::span(&spans[(span.0 + 1)..])), + working_set.get_span_contents(Span::concat(&spans[(span.0 + 1)..])), spans[span.0 + 1].start, &[], &[], @@ -2986,7 +3009,7 @@ pub fn parse_let(working_set: &mut StateWorkingSet, spans: &[Span]) -> Pipeline working_set.error(parse_error) } - let rvalue_span = nu_protocol::span(&spans[(span.0 + 1)..]); + let rvalue_span = Span::concat(&spans[(span.0 + 1)..]); let rvalue_block = parse_block(working_set, &tokens, rvalue_span, false, true); let output_type = rvalue_block.output_type(); @@ -3025,7 +3048,7 @@ pub fn parse_let(working_set: &mut StateWorkingSet, spans: &[Span]) -> Pipeline working_set.error(ParseError::TypeMismatch( explicit_type.clone(), rhs_type.clone(), - nu_protocol::span(&spans[(span.0 + 1)..]), + Span::concat(&spans[(span.0 + 1)..]), )); } } @@ -3045,7 +3068,7 @@ pub fn parse_let(working_set: &mut StateWorkingSet, spans: &[Span]) -> Pipeline return Pipeline::from_vec(vec![Expression { expr: Expr::Call(call), - span: nu_protocol::span(spans), + span: Span::concat(spans), ty: Type::Any, custom_completion: None, }]); @@ -3057,20 +3080,20 @@ pub fn parse_let(working_set: &mut StateWorkingSet, spans: &[Span]) -> Pipeline return Pipeline::from_vec(vec![Expression { expr: Expr::Call(call), - span: nu_protocol::span(spans), + span: Span::concat(spans), ty: output, custom_completion: None, }]); } else { working_set.error(ParseError::UnknownState( "internal error: let or const statements not found in core language".into(), - span(spans), + Span::concat(spans), )) } working_set.error(ParseError::UnknownState( "internal error: let or const statement unparsable".into(), - span(spans), + Span::concat(spans), )); garbage_pipeline(spans) @@ -3134,7 +3157,7 @@ pub fn parse_const(working_set: &mut StateWorkingSet, spans: &[Span]) -> Pipelin working_set.error(ParseError::TypeMismatch( explicit_type.clone(), rhs_type.clone(), - nu_protocol::span(&spans[(span.0 + 1)..]), + Span::concat(&spans[(span.0 + 1)..]), )); } } @@ -3155,7 +3178,7 @@ pub fn parse_const(working_set: &mut StateWorkingSet, spans: &[Span]) -> Pipelin working_set.error(ParseError::TypeMismatch( explicit_type.clone(), const_type.clone(), - nu_protocol::span(&spans[(span.0 + 1)..]), + Span::concat(&spans[(span.0 + 1)..]), )); } let val_span = value.span(); @@ -3191,7 +3214,7 @@ pub fn parse_const(working_set: &mut StateWorkingSet, spans: &[Span]) -> Pipelin return Pipeline::from_vec(vec![Expression { expr: Expr::Call(call), - span: nu_protocol::span(spans), + span: Span::concat(spans), ty: Type::Any, custom_completion: None, }]); @@ -3203,20 +3226,20 @@ pub fn parse_const(working_set: &mut StateWorkingSet, spans: &[Span]) -> Pipelin return Pipeline::from_vec(vec![Expression { expr: Expr::Call(call), - span: nu_protocol::span(spans), + span: Span::concat(spans), ty: output, custom_completion: None, }]); } else { working_set.error(ParseError::UnknownState( "internal error: let or const statements not found in core language".into(), - span(spans), + Span::concat(spans), )) } working_set.error(ParseError::UnknownState( "internal error: let or const statement unparsable".into(), - span(spans), + Span::concat(spans), )); garbage_pipeline(spans) @@ -3239,7 +3262,7 @@ pub fn parse_mut(working_set: &mut StateWorkingSet, spans: &[Span]) -> Pipeline // mut x = 'f', = at least start from index 2 if item == b"=" && spans.len() > (span.0 + 1) && span.0 > 1 { let (tokens, parse_error) = lex( - working_set.get_span_contents(nu_protocol::span(&spans[(span.0 + 1)..])), + working_set.get_span_contents(Span::concat(&spans[(span.0 + 1)..])), spans[span.0 + 1].start, &[], &[], @@ -3250,7 +3273,7 @@ pub fn parse_mut(working_set: &mut StateWorkingSet, spans: &[Span]) -> Pipeline working_set.error(parse_error); } - let rvalue_span = nu_protocol::span(&spans[(span.0 + 1)..]); + let rvalue_span = Span::concat(&spans[(span.0 + 1)..]); let rvalue_block = parse_block(working_set, &tokens, rvalue_span, false, true); let output_type = rvalue_block.output_type(); @@ -3290,7 +3313,7 @@ pub fn parse_mut(working_set: &mut StateWorkingSet, spans: &[Span]) -> Pipeline working_set.error(ParseError::TypeMismatch( explicit_type.clone(), rhs_type.clone(), - nu_protocol::span(&spans[(span.0 + 1)..]), + Span::concat(&spans[(span.0 + 1)..]), )); } } @@ -3310,7 +3333,7 @@ pub fn parse_mut(working_set: &mut StateWorkingSet, spans: &[Span]) -> Pipeline return Pipeline::from_vec(vec![Expression { expr: Expr::Call(call), - span: nu_protocol::span(spans), + span: Span::concat(spans), ty: Type::Any, custom_completion: None, }]); @@ -3322,20 +3345,20 @@ pub fn parse_mut(working_set: &mut StateWorkingSet, spans: &[Span]) -> Pipeline return Pipeline::from_vec(vec![Expression { expr: Expr::Call(call), - span: nu_protocol::span(spans), + span: Span::concat(spans), ty: output, custom_completion: None, }]); } else { working_set.error(ParseError::UnknownState( "internal error: let or const statements not found in core language".into(), - span(spans), + Span::concat(spans), )) } working_set.error(ParseError::UnknownState( "internal error: let or const statement unparsable".into(), - span(spans), + Span::concat(spans), )); garbage_pipeline(spans) @@ -3375,7 +3398,7 @@ pub fn parse_source(working_set: &mut StateWorkingSet, lite_command: &LiteComman if is_help { return Pipeline::from_vec(vec![Expression { expr: Expr::Call(call), - span: span(spans), + span: Span::concat(spans), ty: output, custom_completion: None, }]); @@ -3388,10 +3411,10 @@ pub fn parse_source(working_set: &mut StateWorkingSet, lite_command: &LiteComman let val = match eval_constant(working_set, &expr) { Ok(val) => val, Err(err) => { - working_set.error(err.wrap(working_set, span(&spans[1..]))); + working_set.error(err.wrap(working_set, Span::concat(&spans[1..]))); return Pipeline::from_vec(vec![Expression { expr: Expr::Call(call), - span: span(&spans[1..]), + span: Span::concat(&spans[1..]), ty: Type::Any, custom_completion: None, }]); @@ -3401,10 +3424,10 @@ pub fn parse_source(working_set: &mut StateWorkingSet, lite_command: &LiteComman let filename = match val.coerce_into_string() { Ok(s) => s, Err(err) => { - working_set.error(err.wrap(working_set, span(&spans[1..]))); + working_set.error(err.wrap(working_set, Span::concat(&spans[1..]))); return Pipeline::from_vec(vec![Expression { expr: Expr::Call(call), - span: span(&spans[1..]), + span: Span::concat(&spans[1..]), ty: Type::Any, custom_completion: None, }]); @@ -3450,7 +3473,7 @@ pub fn parse_source(working_set: &mut StateWorkingSet, lite_command: &LiteComman return Pipeline::from_vec(vec![Expression { expr: Expr::Call(call_with_block), - span: span(spans), + span: Span::concat(spans), ty: Type::Any, custom_completion: None, }]); @@ -3461,7 +3484,7 @@ pub fn parse_source(working_set: &mut StateWorkingSet, lite_command: &LiteComman } return Pipeline::from_vec(vec![Expression { expr: Expr::Call(call), - span: span(spans), + span: Span::concat(spans), ty: Type::Any, custom_completion: None, }]); @@ -3469,7 +3492,7 @@ pub fn parse_source(working_set: &mut StateWorkingSet, lite_command: &LiteComman } working_set.error(ParseError::UnknownState( "internal error: source statement unparsable".into(), - span(spans), + Span::concat(spans), )); garbage_pipeline(spans) } @@ -3480,18 +3503,18 @@ pub fn parse_where_expr(working_set: &mut StateWorkingSet, spans: &[Span]) -> Ex if !spans.is_empty() && working_set.get_span_contents(spans[0]) != b"where" { working_set.error(ParseError::UnknownState( "internal error: Wrong call name for 'where' command".into(), - span(spans), + Span::concat(spans), )); - return garbage(span(spans)); + return garbage(Span::concat(spans)); } if spans.len() < 2 { working_set.error(ParseError::MissingPositional( "row condition".into(), - span(spans), + Span::concat(spans), "where ".into(), )); - return garbage(span(spans)); + return garbage(Span::concat(spans)); } let call = match working_set.find_decl(b"where") { @@ -3500,13 +3523,13 @@ pub fn parse_where_expr(working_set: &mut StateWorkingSet, spans: &[Span]) -> Ex parse_internal_call(working_set, spans[0], &spans[1..], decl_id); let decl = working_set.get_decl(decl_id); - let call_span = span(spans); + let call_span = Span::concat(spans); let starting_error_count = working_set.parse_errors.len(); check_call(working_set, call_span, &decl.signature(), &call); let Ok(is_help) = has_flag_const(working_set, &call, "help") else { - return garbage(span(spans)); + return garbage(Span::concat(spans)); }; if starting_error_count != working_set.parse_errors.len() || is_help { @@ -3523,15 +3546,15 @@ pub fn parse_where_expr(working_set: &mut StateWorkingSet, spans: &[Span]) -> Ex None => { working_set.error(ParseError::UnknownState( "internal error: 'where' declaration not found".into(), - span(spans), + Span::concat(spans), )); - return garbage(span(spans)); + return garbage(Span::concat(spans)); } }; Expression { expr: Expr::Call(call), - span: span(spans), + span: Span::concat(spans), ty: Type::Any, custom_completion: None, } @@ -3574,7 +3597,7 @@ pub fn parse_register(working_set: &mut StateWorkingSet, lite_command: &LiteComm if working_set.get_span_contents(spans[0]) != b"register" { working_set.error(ParseError::UnknownState( "internal error: Wrong call name for 'register' function".into(), - span(spans), + Span::concat(spans), )); return garbage_pipeline(spans); } @@ -3590,7 +3613,7 @@ pub fn parse_register(working_set: &mut StateWorkingSet, lite_command: &LiteComm None => { working_set.error(ParseError::UnknownState( "internal error: Register declaration not found".into(), - span(spans), + Span::concat(spans), )); return garbage_pipeline(spans); } @@ -3599,7 +3622,7 @@ pub fn parse_register(working_set: &mut StateWorkingSet, lite_command: &LiteComm parse_internal_call(working_set, spans[0], &spans[1..], decl_id); let decl = working_set.get_decl(decl_id); - let call_span = span(spans); + let call_span = Span::concat(spans); let starting_error_count = working_set.parse_errors.len(); check_call(working_set, call_span, &decl.signature(), &call); diff --git a/crates/nu-parser/src/parser.rs b/crates/nu-parser/src/parser.rs index 0f72132d10..bd09f5b52a 100644 --- a/crates/nu-parser/src/parser.rs +++ b/crates/nu-parser/src/parser.rs @@ -11,7 +11,7 @@ use itertools::Itertools; use log::trace; use nu_engine::DIR_VAR_PARSER_INFO; use nu_protocol::{ - ast::*, engine::StateWorkingSet, eval_const::eval_constant, span, BlockId, DidYouMean, Flag, + ast::*, engine::StateWorkingSet, eval_const::eval_constant, BlockId, DidYouMean, Flag, ParseError, PositionalArg, Signature, Span, Spanned, SyntaxShape, Type, VarId, ENV_VARIABLE_ID, IN_VARIABLE_ID, }; @@ -27,7 +27,7 @@ pub fn garbage(span: Span) -> Expression { } pub fn garbage_pipeline(spans: &[Span]) -> Pipeline { - Pipeline::from_vec(vec![garbage(span(spans))]) + Pipeline::from_vec(vec![garbage(Span::concat(spans))]) } fn is_identifier_byte(b: u8) -> bool { @@ -298,7 +298,7 @@ pub fn parse_external_call(working_set: &mut StateWorkingSet, spans: &[Span]) -> Expression { expr: Expr::ExternalCall(head, args), - span: span(spans), + span: Span::concat(spans), ty: Type::Any, custom_completion: None, } @@ -1057,7 +1057,7 @@ pub fn parse_call(working_set: &mut StateWorkingSet, spans: &[Span], head: Span) if spans.is_empty() { working_set.error(ParseError::UnknownState( "Encountered command with zero spans".into(), - span(spans), + Span::concat(spans), )); return garbage(head); } @@ -1119,9 +1119,9 @@ pub fn parse_call(working_set: &mut StateWorkingSet, spans: &[Span], head: Span) working_set.error(ParseError::UnknownState( "Incomplete statement".into(), - span(spans), + Span::concat(spans), )); - return garbage(span(spans)); + return garbage(Span::concat(spans)); } } @@ -1149,7 +1149,7 @@ pub fn parse_call(working_set: &mut StateWorkingSet, spans: &[Span], head: Span) return Expression { expr: Expr::ExternalCall(head, final_args.into()), - span: span(spans), + span: Span::concat(spans), ty: ty.clone(), custom_completion: *custom_completion, }; @@ -1157,7 +1157,7 @@ pub fn parse_call(working_set: &mut StateWorkingSet, spans: &[Span], head: Span) trace!("parsing: alias of internal call"); parse_internal_call( working_set, - span(&spans[cmd_start..pos]), + Span::concat(&spans[cmd_start..pos]), &spans[pos..], decl_id, ) @@ -1166,7 +1166,7 @@ pub fn parse_call(working_set: &mut StateWorkingSet, spans: &[Span], head: Span) trace!("parsing: internal call"); parse_internal_call( working_set, - span(&spans[cmd_start..pos]), + Span::concat(&spans[cmd_start..pos]), &spans[pos..], decl_id, ) @@ -1174,7 +1174,7 @@ pub fn parse_call(working_set: &mut StateWorkingSet, spans: &[Span], head: Span) Expression { expr: Expr::Call(parsed_call.call), - span: span(spans), + span: Span::concat(spans), ty: parsed_call.output, custom_completion: None, } @@ -2797,9 +2797,9 @@ pub fn parse_import_pattern(working_set: &mut StateWorkingSet, spans: &[Span]) - let Some(head_span) = spans.first() else { working_set.error(ParseError::WrongImportPattern( "needs at least one component of import pattern".to_string(), - span(spans), + Span::concat(spans), )); - return garbage(span(spans)); + return garbage(Span::concat(spans)); }; let head_expr = parse_value(working_set, *head_span, &SyntaxShape::Any); @@ -2808,13 +2808,13 @@ pub fn parse_import_pattern(working_set: &mut StateWorkingSet, spans: &[Span]) - Ok(val) => match val.coerce_into_string() { Ok(s) => (working_set.find_module(s.as_bytes()), s.into_bytes()), Err(err) => { - working_set.error(err.wrap(working_set, span(spans))); - return garbage(span(spans)); + working_set.error(err.wrap(working_set, Span::concat(spans))); + return garbage(Span::concat(spans)); } }, Err(err) => { - working_set.error(err.wrap(working_set, span(spans))); - return garbage(span(spans)); + working_set.error(err.wrap(working_set, Span::concat(spans))); + return garbage(Span::concat(spans)); } }; @@ -2894,7 +2894,7 @@ pub fn parse_import_pattern(working_set: &mut StateWorkingSet, spans: &[Span]) - working_set.error(ParseError::ExportNotFound(result.span)); return Expression { expr: Expr::ImportPattern(Box::new(import_pattern)), - span: span(spans), + span: Span::concat(spans), ty: Type::List(Box::new(Type::String)), custom_completion: None, }; @@ -2914,7 +2914,7 @@ pub fn parse_import_pattern(working_set: &mut StateWorkingSet, spans: &[Span]) - Expression { expr: Expr::ImportPattern(Box::new(import_pattern)), - span: span(&spans[1..]), + span: Span::concat(&spans[1..]), ty: Type::List(Box::new(Type::String)), custom_completion: None, } @@ -2948,7 +2948,7 @@ pub fn parse_var_with_opt_type( *spans_idx += 1; // signature like record is broken into multiple spans due to // whitespaces. Collect the rest into one span and work on it - let full_span = span(&spans[*spans_idx..]); + let full_span = Span::concat(&spans[*spans_idx..]); let type_bytes = working_set.get_span_contents(full_span).to_vec(); let (tokens, parse_error) = @@ -2976,7 +2976,7 @@ pub fn parse_var_with_opt_type( ( Expression { expr: Expr::VarDecl(id), - span: span(&spans[span_beginning..*spans_idx + 1]), + span: Span::concat(&spans[span_beginning..*spans_idx + 1]), ty: ty.clone(), custom_completion: None, }, @@ -3019,7 +3019,7 @@ pub fn parse_var_with_opt_type( let id = working_set.add_variable( var_name, - span(&spans[*spans_idx..*spans_idx + 1]), + Span::concat(&spans[*spans_idx..*spans_idx + 1]), Type::Any, mutable, ); @@ -3067,7 +3067,7 @@ pub fn parse_input_output_types( working_set: &mut StateWorkingSet, spans: &[Span], ) -> Vec<(Type, Type)> { - let mut full_span = span(spans); + let mut full_span = Span::concat(spans); let mut bytes = working_set.get_span_contents(full_span); @@ -3145,7 +3145,7 @@ pub fn parse_full_signature(working_set: &mut StateWorkingSet, spans: &[Span]) - } = &mut arg_signature { sig.input_output_types = input_output_types; - expr_span.end = span(&spans[1..]).end; + expr_span.end = Span::concat(&spans[1..]).end; } arg_signature } else { @@ -3154,9 +3154,9 @@ pub fn parse_full_signature(working_set: &mut StateWorkingSet, spans: &[Span]) - } pub fn parse_row_condition(working_set: &mut StateWorkingSet, spans: &[Span]) -> Expression { - let var_id = working_set.add_variable(b"$it".to_vec(), span(spans), Type::Any, false); + let var_id = working_set.add_variable(b"$it".to_vec(), Span::concat(spans), Type::Any, false); let expression = parse_math_expression(working_set, spans, Some(var_id)); - let span = span(spans); + let span = Span::concat(spans); let block_id = match expression.expr { Expr::Block(block_id) => block_id, @@ -5060,7 +5060,7 @@ pub fn parse_math_expression( working_set.error(err); } - let op_span = span(&[lhs.span, rhs.span]); + let op_span = Span::append(lhs.span, rhs.span); expr_stack.push(Expression { expr: Expr::BinaryOp(Box::new(lhs), Box::new(op), Box::new(rhs)), span: op_span, @@ -5096,7 +5096,7 @@ pub fn parse_math_expression( working_set.error(err) } - let binary_op_span = span(&[lhs.span, rhs.span]); + let binary_op_span = Span::append(lhs.span, rhs.span); expr_stack.push(Expression { expr: Expr::BinaryOp(Box::new(lhs), Box::new(op), Box::new(rhs)), span: binary_op_span, @@ -5167,7 +5167,7 @@ pub fn parse_expression(working_set: &mut StateWorkingSet, spans: &[Span]) -> Ex if pos == spans.len() { working_set.error(ParseError::UnknownCommand(spans[0])); - return garbage(span(spans)); + return garbage(Span::concat(spans)); } let output = if is_math_expression_like(working_set, spans[pos]) { @@ -5262,13 +5262,13 @@ pub fn parse_expression(working_set: &mut StateWorkingSet, spans: &[Span]) -> Ex let arguments = vec![ Argument::Positional(Expression { expr: Expr::Record(env_vars), - span: span(&spans[..pos]), + span: Span::concat(&spans[..pos]), ty: Type::Any, custom_completion: None, }), Argument::Positional(Expression { expr: Expr::Closure(block_id), - span: span(&spans[pos..]), + span: Span::concat(&spans[pos..]), ty: Type::Closure, custom_completion: None, }), @@ -5284,7 +5284,7 @@ pub fn parse_expression(working_set: &mut StateWorkingSet, spans: &[Span]) -> Ex Expression { expr, custom_completion: None, - span: span(spans), + span: Span::concat(spans), ty, } } else { @@ -5636,7 +5636,7 @@ pub fn parse_pipeline( // if the 'let' is complete enough, use it, if not, fall through for now if new_command.parts.len() > 3 { - let rhs_span = nu_protocol::span(&new_command.parts[3..]); + let rhs_span = Span::concat(&new_command.parts[3..]); new_command.parts.truncate(3); new_command.parts.push(rhs_span); diff --git a/crates/nu-protocol/src/ast/import_pattern.rs b/crates/nu-protocol/src/ast/import_pattern.rs index 2af08087a9..893dd9897b 100644 --- a/crates/nu-protocol/src/ast/import_pattern.rs +++ b/crates/nu-protocol/src/ast/import_pattern.rs @@ -1,6 +1,6 @@ use serde::{Deserialize, Serialize}; -use crate::{span, ModuleId, Span, VarId}; +use crate::{ModuleId, Span, VarId}; use std::collections::HashSet; #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] @@ -12,17 +12,22 @@ pub enum ImportPatternMember { impl ImportPatternMember { pub fn span(&self) -> Span { - let mut spans = vec![]; match self { - ImportPatternMember::Glob { span } => spans.push(*span), - ImportPatternMember::Name { name: _, span } => spans.push(*span), + ImportPatternMember::Glob { span } | ImportPatternMember::Name { span, .. } => *span, ImportPatternMember::List { names } => { - for (_, span) in names { - spans.push(*span); - } + let first = names + .first() + .map(|&(_, span)| span) + .unwrap_or(Span::unknown()); + + let last = names + .last() + .map(|&(_, span)| span) + .unwrap_or(Span::unknown()); + + Span::append(first, last) } } - span(&spans) } } @@ -59,13 +64,13 @@ impl ImportPattern { } pub fn span(&self) -> Span { - let mut spans = vec![self.head.span]; - - for member in &self.members { - spans.push(member.span()); - } - - span(&spans) + Span::append( + self.head.span, + self.members + .last() + .map(ImportPatternMember::span) + .unwrap_or(self.head.span), + ) } pub fn with_hidden(self, hidden: HashSet>) -> Self { diff --git a/crates/nu-protocol/src/span.rs b/crates/nu-protocol/src/span.rs index 7bc13997a1..3d32aa4ddf 100644 --- a/crates/nu-protocol/src/span.rs +++ b/crates/nu-protocol/src/span.rs @@ -1,7 +1,6 @@ -use std::ops::Deref; - use miette::SourceSpan; use serde::{Deserialize, Serialize}; +use std::ops::Deref; /// A spanned area of interest, generic over what kind of thing is of interest #[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq)] @@ -74,77 +73,123 @@ impl IntoSpanned for T { /// Spans are a global offset across all seen files, which are cached in the engine's state. The start and /// end offset together make the inclusive start/exclusive end pair for where to underline to highlight /// a given point of interest. -#[non_exhaustive] #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)] pub struct Span { pub start: usize, pub end: usize, } -impl From for SourceSpan { - fn from(s: Span) -> Self { - Self::new(s.start.into(), s.end - s.start) - } -} - impl Span { - pub fn new(start: usize, end: usize) -> Span { + pub fn new(start: usize, end: usize) -> Self { debug_assert!( end >= start, "Can't create a Span whose end < start, start={start}, end={end}" ); - Span { start, end } + Self { start, end } } - pub const fn unknown() -> Span { - Span { start: 0, end: 0 } + pub const fn unknown() -> Self { + Self { start: 0, end: 0 } } /// Note: Only use this for test data, *not* live data, as it will point into unknown source /// when used in errors. - pub const fn test_data() -> Span { + pub const fn test_data() -> Self { Self::unknown() } - pub fn offset(&self, offset: usize) -> Span { - Span::new(self.start - offset, self.end - offset) + pub fn offset(&self, offset: usize) -> Self { + Self::new(self.start - offset, self.end - offset) } pub fn contains(&self, pos: usize) -> bool { - pos >= self.start && pos < self.end + self.start <= pos && pos < self.end } - pub fn contains_span(&self, span: Span) -> bool { - span.start >= self.start && span.end <= self.end + pub fn contains_span(&self, span: Self) -> bool { + self.start <= span.start && span.end <= self.end } - /// Point to the space just past this span, useful for missing - /// values - pub fn past(&self) -> Span { - Span { + /// Point to the space just past this span, useful for missing values + pub fn past(&self) -> Self { + Self { start: self.end, end: self.end, } } + + /// Returns the minimal [`Span`] that encompasses both of the given spans. + /// + /// The two `Spans` can overlap in the middle, + /// but must otherwise be in order by satisfying: + /// - `self.start <= after.start` + /// - `self.end <= after.end` + /// + /// If this is not guaranteed to be the case, use [`Span::merge`] instead. + pub fn append(self, after: Self) -> Self { + debug_assert!( + self.start <= after.start && self.end <= after.end, + "Can't merge two Spans that are not in order" + ); + Self { + start: self.start, + end: after.end, + } + } + + /// Returns the minimal [`Span`] that encompasses both of the given spans. + /// + /// The spans need not be in order or have any relationship. + /// + /// [`Span::append`] is slightly more efficient if the spans are known to be in order. + pub fn merge(self, other: Self) -> Self { + Self { + start: usize::min(self.start, other.start), + end: usize::max(self.end, other.end), + } + } + + /// Returns the minimal [`Span`] that encompasses all of the spans in the given slice. + /// + /// The spans are assumed to be in order, that is, all consecutive spans must satisfy: + /// - `spans[i].start <= spans[i + 1].start` + /// - `spans[i].end <= spans[i + 1].end` + /// + /// (Two consecutive spans can overlap as long as the above is true.) + /// + /// Use [`Span::merge_many`] if the spans are not known to be in order. + pub fn concat(spans: &[Self]) -> Self { + // TODO: enable assert below + // debug_assert!(!spans.is_empty()); + debug_assert!(spans.windows(2).all(|spans| { + let &[a, b] = spans else { + return false; + }; + a.start <= b.start && a.end <= b.end + })); + Self { + start: spans.first().map(|s| s.start).unwrap_or(0), + end: spans.last().map(|s| s.end).unwrap_or(0), + } + } + + /// Returns the minimal [`Span`] that encompasses all of the spans in the given iterator. + /// + /// The spans need not be in order or have any relationship. + /// + /// [`Span::concat`] is more efficient if the spans are known to be in order. + pub fn merge_many(spans: impl IntoIterator) -> Self { + spans + .into_iter() + .reduce(Self::merge) + .unwrap_or(Self::unknown()) + } } -/// Used when you have a slice of spans of at least size 1 -pub fn span(spans: &[Span]) -> Span { - let length = spans.len(); - - //TODO debug_assert!(length > 0, "expect spans > 0"); - if length == 0 { - Span::unknown() - } else if length == 1 { - spans[0] - } else { - let end = spans - .iter() - .map(|s| s.end) - .max() - .expect("Must be an end. Length > 0"); - Span::new(spans[0].start, end) +impl From for SourceSpan { + fn from(s: Span) -> Self { + Self::new(s.start.into(), s.end - s.start) } } diff --git a/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/between_values.rs b/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/between_values.rs index df0854ffee..a47197bde8 100644 --- a/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/between_values.rs +++ b/crates/nu_plugin_polars/src/dataframe/values/nu_dataframe/between_values.rs @@ -1,7 +1,7 @@ use super::{operations::Axis, NuDataFrame}; use nu_protocol::{ ast::{Boolean, Comparison, Math, Operator}, - span, ShellError, Span, Spanned, Value, + ShellError, Span, Spanned, Value, }; use num::Zero; use polars::prelude::{ @@ -17,9 +17,10 @@ pub(super) fn between_dataframes( right: &Value, rhs: &NuDataFrame, ) -> Result { - let operation_span = span(&[left.span(), right.span()]); match operator.item { - Operator::Math(Math::Plus) => lhs.append_df(rhs, Axis::Row, operation_span), + Operator::Math(Math::Plus) => { + lhs.append_df(rhs, Axis::Row, Span::merge(left.span(), right.span())) + } _ => Err(ShellError::OperatorMismatch { op_span: operator.span, lhs_ty: left.get_type().to_string(), @@ -37,7 +38,7 @@ pub(super) fn compute_between_series( right: &Value, rhs: &Series, ) -> Result { - let operation_span = span(&[left.span(), right.span()]); + let operation_span = Span::merge(left.span(), right.span()); match operator.item { Operator::Math(Math::Plus) => { let mut res = lhs + rhs; diff --git a/crates/nu_plugin_polars/src/dataframe/values/utils.rs b/crates/nu_plugin_polars/src/dataframe/values/utils.rs index f77870114b..88ce8a4656 100644 --- a/crates/nu_plugin_polars/src/dataframe/values/utils.rs +++ b/crates/nu_plugin_polars/src/dataframe/values/utils.rs @@ -1,4 +1,4 @@ -use nu_protocol::{span as span_join, ShellError, Span, Spanned, Value}; +use nu_protocol::{ShellError, Span, Spanned, Value}; // Default value used when selecting rows from dataframe pub const DEFAULT_ROWS: usize = 5; @@ -20,8 +20,8 @@ pub(crate) fn convert_columns( span: Some(span), help: None, inner: vec![], - }) - .map(|v| v.span())?; + })? + .span(); let res = columns .into_iter() @@ -29,7 +29,7 @@ pub(crate) fn convert_columns( let span = value.span(); match value { Value::String { val, .. } => { - col_span = span_join(&[col_span, span]); + col_span = col_span.merge(span); Ok(Spanned { item: val, span }) } _ => Err(ShellError::GenericError { @@ -70,7 +70,7 @@ pub(crate) fn convert_columns_string( let span = value.span(); match value { Value::String { val, .. } => { - col_span = span_join(&[col_span, span]); + col_span = col_span.merge(span); Ok(val) } _ => Err(ShellError::GenericError { From 6891267b5347f42f1fc53d1f737928ade2449660 Mon Sep 17 00:00:00 2001 From: Ian Manske Date: Thu, 16 May 2024 23:59:08 +0000 Subject: [PATCH 045/137] Support `ByteStream`s in `bytes starts-with` and `bytes ends-with` (#12887) # Description Restores `bytes starts-with` so that it is able to work with byte streams once again. For parity/consistency, this PR also adds byte stream support to `bytes ends-with`. # User-Facing Changes - `bytes ends-with` now supports byte streams. # Tests + Formatting Re-enabled tests for `bytes starts-with` and added tests for `bytes ends-with`. --- .../tests/commands/bytes/ends_with.rs | 120 +++++++++++++ .../nu-cmd-extra/tests/commands/bytes/mod.rs | 1 + .../tests/commands/bytes/starts_with.rs | 160 +++++++++--------- crates/nu-command/src/bytes/ends_with.rs | 54 +++++- crates/nu-command/src/bytes/starts_with.rs | 36 ++-- 5 files changed, 275 insertions(+), 96 deletions(-) create mode 100644 crates/nu-cmd-extra/tests/commands/bytes/ends_with.rs diff --git a/crates/nu-cmd-extra/tests/commands/bytes/ends_with.rs b/crates/nu-cmd-extra/tests/commands/bytes/ends_with.rs new file mode 100644 index 0000000000..b90f936b96 --- /dev/null +++ b/crates/nu-cmd-extra/tests/commands/bytes/ends_with.rs @@ -0,0 +1,120 @@ +use nu_test_support::nu; + +#[test] +fn basic_binary_end_with() { + let actual = nu!(r#" + "hello world" | into binary | bytes ends-with 0x[77 6f 72 6c 64] + "#); + + assert_eq!(actual.out, "true"); +} + +#[test] +fn basic_string_fails() { + let actual = nu!(r#" + "hello world" | bytes ends-with 0x[77 6f 72 6c 64] + "#); + + assert!(actual.err.contains("command doesn't support")); + assert_eq!(actual.out, ""); +} + +#[test] +fn short_stream_binary() { + let actual = nu!(r#" + nu --testbin repeater (0x[01]) 5 | bytes ends-with 0x[010101] + "#); + + assert_eq!(actual.out, "true"); +} + +#[test] +fn short_stream_mismatch() { + let actual = nu!(r#" + nu --testbin repeater (0x[010203]) 5 | bytes ends-with 0x[010204] + "#); + + assert_eq!(actual.out, "false"); +} + +#[test] +fn short_stream_binary_overflow() { + let actual = nu!(r#" + nu --testbin repeater (0x[01]) 5 | bytes ends-with 0x[010101010101] + "#); + + assert_eq!(actual.out, "false"); +} + +#[test] +fn long_stream_binary() { + let actual = nu!(r#" + nu --testbin repeater (0x[01]) 32768 | bytes ends-with 0x[010101] + "#); + + assert_eq!(actual.out, "true"); +} + +#[test] +fn long_stream_binary_overflow() { + // .. ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow + let actual = nu!(r#" + nu --testbin repeater (0x[01]) 32768 | bytes ends-with (0..32768 | each {|| 0x[01] } | bytes collect) + "#); + + assert_eq!(actual.out, "false"); +} + +#[test] +fn long_stream_binary_exact() { + // ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow + let actual = nu!(r#" + nu --testbin repeater (0x[01020304]) 8192 | bytes ends-with (0..<8192 | each {|| 0x[01020304] } | bytes collect) + "#); + + assert_eq!(actual.out, "true"); +} + +#[test] +fn long_stream_string_exact() { + // ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow + let actual = nu!(r#" + nu --testbin repeater hell 8192 | bytes ends-with (0..<8192 | each {|| "hell" | into binary } | bytes collect) + "#); + + assert_eq!(actual.out, "true"); +} + +#[test] +fn long_stream_mixed_exact() { + // ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow + let actual = nu!(r#" + let binseg = (0..<2048 | each {|| 0x[003d9fbf] } | bytes collect) + let strseg = (0..<2048 | each {|| "hell" | into binary } | bytes collect) + + nu --testbin repeat_bytes 003d9fbf 2048 68656c6c 2048 | bytes ends-with (bytes build $binseg $strseg) + "#); + + assert_eq!( + actual.err, "", + "invocation failed. command line limit likely reached" + ); + assert_eq!(actual.out, "true"); +} + +#[test] +fn long_stream_mixed_overflow() { + // ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow + let actual = nu!(r#" + let binseg = (0..<2048 | each {|| 0x[003d9fbf] } | bytes collect) + let strseg = (0..<2048 | each {|| "hell" | into binary } | bytes collect) + + nu --testbin repeat_bytes 003d9fbf 2048 68656c6c 2048 | bytes ends-with (bytes build 0x[01] $binseg $strseg) + "#); + + assert_eq!( + actual.err, "", + "invocation failed. command line limit likely reached" + ); + assert_eq!(actual.out, "false"); +} diff --git a/crates/nu-cmd-extra/tests/commands/bytes/mod.rs b/crates/nu-cmd-extra/tests/commands/bytes/mod.rs index b5517bdacb..a8a241eec0 100644 --- a/crates/nu-cmd-extra/tests/commands/bytes/mod.rs +++ b/crates/nu-cmd-extra/tests/commands/bytes/mod.rs @@ -1 +1,2 @@ +mod ends_with; mod starts_with; diff --git a/crates/nu-cmd-extra/tests/commands/bytes/starts_with.rs b/crates/nu-cmd-extra/tests/commands/bytes/starts_with.rs index c3ad1ec448..e7d57698b5 100644 --- a/crates/nu-cmd-extra/tests/commands/bytes/starts_with.rs +++ b/crates/nu-cmd-extra/tests/commands/bytes/starts_with.rs @@ -19,102 +19,102 @@ fn basic_string_fails() { assert_eq!(actual.out, ""); } -// #[test] -// fn short_stream_binary() { -// let actual = nu!(r#" -// nu --testbin repeater (0x[01]) 5 | bytes starts-with 0x[010101] -// "#); +#[test] +fn short_stream_binary() { + let actual = nu!(r#" + nu --testbin repeater (0x[01]) 5 | bytes starts-with 0x[010101] + "#); -// assert_eq!(actual.out, "true"); -// } + assert_eq!(actual.out, "true"); +} -// #[test] -// fn short_stream_mismatch() { -// let actual = nu!(r#" -// nu --testbin repeater (0x[010203]) 5 | bytes starts-with 0x[010204] -// "#); +#[test] +fn short_stream_mismatch() { + let actual = nu!(r#" + nu --testbin repeater (0x[010203]) 5 | bytes starts-with 0x[010204] + "#); -// assert_eq!(actual.out, "false"); -// } + assert_eq!(actual.out, "false"); +} -// #[test] -// fn short_stream_binary_overflow() { -// let actual = nu!(r#" -// nu --testbin repeater (0x[01]) 5 | bytes starts-with 0x[010101010101] -// "#); +#[test] +fn short_stream_binary_overflow() { + let actual = nu!(r#" + nu --testbin repeater (0x[01]) 5 | bytes starts-with 0x[010101010101] + "#); -// assert_eq!(actual.out, "false"); -// } + assert_eq!(actual.out, "false"); +} -// #[test] -// fn long_stream_binary() { -// let actual = nu!(r#" -// nu --testbin repeater (0x[01]) 32768 | bytes starts-with 0x[010101] -// "#); +#[test] +fn long_stream_binary() { + let actual = nu!(r#" + nu --testbin repeater (0x[01]) 32768 | bytes starts-with 0x[010101] + "#); -// assert_eq!(actual.out, "true"); -// } + assert_eq!(actual.out, "true"); +} -// #[test] -// fn long_stream_binary_overflow() { -// // .. ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow -// let actual = nu!(r#" -// nu --testbin repeater (0x[01]) 32768 | bytes starts-with (0..32768 | each {|| 0x[01] } | bytes collect) -// "#); +#[test] +fn long_stream_binary_overflow() { + // .. ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow + let actual = nu!(r#" + nu --testbin repeater (0x[01]) 32768 | bytes starts-with (0..32768 | each {|| 0x[01] } | bytes collect) + "#); -// assert_eq!(actual.out, "false"); -// } + assert_eq!(actual.out, "false"); +} -// #[test] -// fn long_stream_binary_exact() { -// // ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow -// let actual = nu!(r#" -// nu --testbin repeater (0x[01020304]) 8192 | bytes starts-with (0..<8192 | each {|| 0x[01020304] } | bytes collect) -// "#); +#[test] +fn long_stream_binary_exact() { + // ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow + let actual = nu!(r#" + nu --testbin repeater (0x[01020304]) 8192 | bytes starts-with (0..<8192 | each {|| 0x[01020304] } | bytes collect) + "#); -// assert_eq!(actual.out, "true"); -// } + assert_eq!(actual.out, "true"); +} -// #[test] -// fn long_stream_string_exact() { -// // ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow -// let actual = nu!(r#" -// nu --testbin repeater hell 8192 | bytes starts-with (0..<8192 | each {|| "hell" | into binary } | bytes collect) -// "#); +#[test] +fn long_stream_string_exact() { + // ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow + let actual = nu!(r#" + nu --testbin repeater hell 8192 | bytes starts-with (0..<8192 | each {|| "hell" | into binary } | bytes collect) + "#); -// assert_eq!(actual.out, "true"); -// } + assert_eq!(actual.out, "true"); +} -// #[test] -// fn long_stream_mixed_exact() { -// // ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow -// let actual = nu!(r#" -// let binseg = (0..<2048 | each {|| 0x[003d9fbf] } | bytes collect) -// let strseg = (0..<2048 | each {|| "hell" | into binary } | bytes collect) +#[test] +fn long_stream_mixed_exact() { + // ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow + let actual = nu!(r#" + let binseg = (0..<2048 | each {|| 0x[003d9fbf] } | bytes collect) + let strseg = (0..<2048 | each {|| "hell" | into binary } | bytes collect) -// nu --testbin repeat_bytes 003d9fbf 2048 68656c6c 2048 | bytes starts-with (bytes build $binseg $strseg) -// "#); + nu --testbin repeat_bytes 003d9fbf 2048 68656c6c 2048 | bytes starts-with (bytes build $binseg $strseg) + "#); -// assert_eq!( -// actual.err, "", -// "invocation failed. command line limit likely reached" -// ); -// assert_eq!(actual.out, "true"); -// } + assert_eq!( + actual.err, "", + "invocation failed. command line limit likely reached" + ); + assert_eq!(actual.out, "true"); +} -// #[test] -// fn long_stream_mixed_overflow() { -// // ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow -// let actual = nu!(r#" -// let binseg = (0..<2048 | each {|| 0x[003d9fbf] } | bytes collect) -// let strseg = (0..<2048 | each {|| "hell" | into binary } | bytes collect) +#[test] +fn long_stream_mixed_overflow() { + // ranges are inclusive..inclusive, so we don't need to +1 to check for an overflow + let actual = nu!(r#" + let binseg = (0..<2048 | each {|| 0x[003d9fbf] } | bytes collect) + let strseg = (0..<2048 | each {|| "hell" | into binary } | bytes collect) -// nu --testbin repeat_bytes 003d9fbf 2048 68656c6c 2048 | bytes starts-with (bytes build $binseg $strseg 0x[01]) -// "#); + nu --testbin repeat_bytes 003d9fbf 2048 68656c6c 2048 | bytes starts-with (bytes build $binseg $strseg 0x[01]) + "#); -// assert_eq!( -// actual.err, "", -// "invocation failed. command line limit likely reached" -// ); -// assert_eq!(actual.out, "false"); -// } + assert_eq!( + actual.err, "", + "invocation failed. command line limit likely reached" + ); + assert_eq!(actual.out, "false"); +} diff --git a/crates/nu-command/src/bytes/ends_with.rs b/crates/nu-command/src/bytes/ends_with.rs index ef0389db0c..d6174a189c 100644 --- a/crates/nu-command/src/bytes/ends_with.rs +++ b/crates/nu-command/src/bytes/ends_with.rs @@ -1,5 +1,9 @@ use nu_cmd_base::input_handler::{operate, CmdArgument}; use nu_engine::command_prelude::*; +use std::{ + collections::VecDeque, + io::{self, BufRead}, +}; struct Arguments { pattern: Vec, @@ -52,14 +56,54 @@ impl Command for BytesEndsWith { call: &Call, input: PipelineData, ) -> Result { + let head = call.head; let pattern: Vec = call.req(engine_state, stack, 0)?; let cell_paths: Vec = call.rest(engine_state, stack, 1)?; let cell_paths = (!cell_paths.is_empty()).then_some(cell_paths); - let arg = Arguments { - pattern, - cell_paths, - }; - operate(ends_with, arg, input, call.head, engine_state.ctrlc.clone()) + + if let PipelineData::ByteStream(stream, ..) = input { + let span = stream.span(); + if pattern.is_empty() { + return Ok(Value::bool(true, head).into_pipeline_data()); + } + let Some(mut reader) = stream.reader() else { + return Ok(Value::bool(false, head).into_pipeline_data()); + }; + let cap = pattern.len(); + let mut end = VecDeque::::with_capacity(cap); + loop { + let buf = match reader.fill_buf() { + Ok(&[]) => break, + Ok(buf) => buf, + Err(e) if e.kind() == io::ErrorKind::Interrupted => continue, + Err(e) => return Err(e.into_spanned(span).into()), + }; + let len = buf.len(); + if len >= cap { + end.clear(); + end.extend(&buf[(len - cap)..]) + } else { + let new_len = len + end.len(); + if new_len > cap { + // The `drain` below will panic if `(new_len - cap) > end.len()`. + // But this cannot happen since we know `len < cap` (as checked above): + // (len + end.len() - cap) > end.len() + // => (len - cap) > 0 + // => len > cap + end.drain(..(new_len - cap)); + } + end.extend(buf); + } + reader.consume(len); + } + Ok(Value::bool(end == pattern, head).into_pipeline_data()) + } else { + let arg = Arguments { + pattern, + cell_paths, + }; + operate(ends_with, arg, input, head, engine_state.ctrlc.clone()) + } } fn examples(&self) -> Vec { diff --git a/crates/nu-command/src/bytes/starts_with.rs b/crates/nu-command/src/bytes/starts_with.rs index 2d7ca3e26a..92cc16f02c 100644 --- a/crates/nu-command/src/bytes/starts_with.rs +++ b/crates/nu-command/src/bytes/starts_with.rs @@ -1,5 +1,6 @@ use nu_cmd_base::input_handler::{operate, CmdArgument}; use nu_engine::command_prelude::*; +use std::io::Read; struct Arguments { pattern: Vec, @@ -53,20 +54,33 @@ impl Command for BytesStartsWith { call: &Call, input: PipelineData, ) -> Result { + let head = call.head; let pattern: Vec = call.req(engine_state, stack, 0)?; let cell_paths: Vec = call.rest(engine_state, stack, 1)?; let cell_paths = (!cell_paths.is_empty()).then_some(cell_paths); - let arg = Arguments { - pattern, - cell_paths, - }; - operate( - starts_with, - arg, - input, - call.head, - engine_state.ctrlc.clone(), - ) + + if let PipelineData::ByteStream(stream, ..) = input { + let span = stream.span(); + if pattern.is_empty() { + return Ok(Value::bool(true, head).into_pipeline_data()); + } + let Some(reader) = stream.reader() else { + return Ok(Value::bool(false, head).into_pipeline_data()); + }; + let mut start = Vec::with_capacity(pattern.len()); + reader + .take(pattern.len() as u64) + .read_to_end(&mut start) + .err_span(span)?; + + Ok(Value::bool(start == pattern, head).into_pipeline_data()) + } else { + let arg = Arguments { + pattern, + cell_paths, + }; + operate(starts_with, arg, input, head, engine_state.ctrlc.clone()) + } } fn examples(&self) -> Vec { From 8adf3406e5e6c19f86c82c1f51db93d47217d8ef Mon Sep 17 00:00:00 2001 From: Wind Date: Fri, 17 May 2024 08:03:13 +0800 Subject: [PATCH 046/137] allow define it as a variable inside closure (#12888) # Description Fixes: #12690 The issue is happened after https://github.com/nushell/nushell/pull/12056 is merged. It will raise error if user doesn't supply required parameter when run closure with do. And parser adds a `$it` parameter when parsing closure or block expression. I believe the previous behavior is because we allow such syntax on previous version(0.44): ```nushell let x = { print $it } ``` But it's no longer allowed after 0.60. So I think they can be removed. # User-Facing Changes ```nushell let tmp = { let it = 42 print $it } do -c $tmp ``` should be possible again. # Tests + Formatting Added 1 test --- crates/nu-command/tests/commands/do_.rs | 7 +++++++ crates/nu-parser/src/parser.rs | 28 ------------------------- 2 files changed, 7 insertions(+), 28 deletions(-) diff --git a/crates/nu-command/tests/commands/do_.rs b/crates/nu-command/tests/commands/do_.rs index 6a71a0f025..5f46b02c17 100644 --- a/crates/nu-command/tests/commands/do_.rs +++ b/crates/nu-command/tests/commands/do_.rs @@ -66,3 +66,10 @@ fn ignore_error_works_with_list_stream() { let actual = nu!(r#"do -i { ["a", null, "b"] | ansi strip }"#); assert!(actual.err.is_empty()); } + +#[test] +fn run_closure_with_it_using() { + let actual = nu!(r#"let x = {let it = 3; $it}; do $x"#); + assert!(actual.err.is_empty()); + assert_eq!(actual.out, "3"); +} diff --git a/crates/nu-parser/src/parser.rs b/crates/nu-parser/src/parser.rs index bd09f5b52a..fb36e8b503 100644 --- a/crates/nu-parser/src/parser.rs +++ b/crates/nu-parser/src/parser.rs @@ -4177,20 +4177,6 @@ pub fn parse_block_expression(working_set: &mut StateWorkingSet, span: Span) -> if let Some(signature) = signature { output.signature = signature.0; - } else if let Some(last) = working_set.delta.scope.last() { - // FIXME: this only supports the top $it. Is this sufficient? - - if let Some(var_id) = last.get_var(b"$it") { - let mut signature = Signature::new(""); - signature.required_positional.push(PositionalArg { - var_id: Some(*var_id), - name: "$it".into(), - desc: String::new(), - shape: SyntaxShape::Any, - default_value: None, - }); - output.signature = Box::new(signature); - } } output.span = Some(span); @@ -4518,20 +4504,6 @@ pub fn parse_closure_expression( if let Some(signature) = signature { output.signature = signature.0; - } else if let Some(last) = working_set.delta.scope.last() { - // FIXME: this only supports the top $it. Is this sufficient? - - if let Some(var_id) = last.get_var(b"$it") { - let mut signature = Signature::new(""); - signature.required_positional.push(PositionalArg { - var_id: Some(*var_id), - name: "$it".into(), - desc: String::new(), - shape: SyntaxShape::Any, - default_value: None, - }); - output.signature = Box::new(signature); - } } output.span = Some(span); From 59f7c523fac538a433ee80ebdbd8a1f410590082 Mon Sep 17 00:00:00 2001 From: Devyn Cairns Date: Fri, 17 May 2024 07:18:18 -0700 Subject: [PATCH 047/137] Fix the way the output of `table` is printed in `print()` (#12895) # Description Forgot that I fixed this already on my branch, but when printing without a display output hook, the implicit call to `table` gets its output mangled with newlines (since #12774). This happens when running `nu -c` or a script file. Here's that fix in one PR so it can be merged easily. # Tests + Formatting - :green_circle: `toolkit fmt` - :green_circle: `toolkit clippy` - :green_circle: `toolkit test` - :green_circle: `toolkit test stdlib` --- .../nu-protocol/src/pipeline/pipeline_data.rs | 46 ++++++++++--------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/crates/nu-protocol/src/pipeline/pipeline_data.rs b/crates/nu-protocol/src/pipeline/pipeline_data.rs index d7e58e63a3..b2883b1673 100644 --- a/crates/nu-protocol/src/pipeline/pipeline_data.rs +++ b/crates/nu-protocol/src/pipeline/pipeline_data.rs @@ -543,16 +543,15 @@ impl PipelineData { if let Some(decl_id) = engine_state.table_decl_id { let command = engine_state.get_decl(decl_id); if command.get_block_id().is_some() { - self.write_all_and_flush(engine_state, no_newline, to_stderr)?; + self.write_all_and_flush(engine_state, no_newline, to_stderr) } else { let call = Call::new(Span::new(0, 0)); let table = command.run(engine_state, stack, &call, self)?; - table.write_all_and_flush(engine_state, no_newline, to_stderr)?; + table.write_all_and_flush(engine_state, no_newline, to_stderr) } } else { - self.write_all_and_flush(engine_state, no_newline, to_stderr)?; + self.write_all_and_flush(engine_state, no_newline, to_stderr) } - Ok(None) } } @@ -561,27 +560,32 @@ impl PipelineData { engine_state: &EngineState, no_newline: bool, to_stderr: bool, - ) -> Result<(), ShellError> { - let config = engine_state.get_config(); - for item in self { - let mut out = if let Value::Error { error, .. } = item { - return Err(*error); - } else { - item.to_expanded_string("\n", config) - }; + ) -> Result, ShellError> { + if let PipelineData::ByteStream(stream, ..) = self { + // Copy ByteStreams directly + stream.print(to_stderr) + } else { + let config = engine_state.get_config(); + for item in self { + let mut out = if let Value::Error { error, .. } = item { + return Err(*error); + } else { + item.to_expanded_string("\n", config) + }; - if !no_newline { - out.push('\n'); + if !no_newline { + out.push('\n'); + } + + if to_stderr { + stderr_write_all_and_flush(out)? + } else { + stdout_write_all_and_flush(out)? + } } - if to_stderr { - stderr_write_all_and_flush(out)? - } else { - stdout_write_all_and_flush(out)? - } + Ok(None) } - - Ok(()) } } From e3db6ea04ae085dbbe6f1aea984b23069fbb2a46 Mon Sep 17 00:00:00 2001 From: Devyn Cairns Date: Fri, 17 May 2024 08:04:59 -0700 Subject: [PATCH 048/137] Exclude polars from ensure_plugins_built(), for performance reasons (#12896) # Description We have been building `nu_plugin_polars` unnecessarily during `cargo test`, which is very slow. All of its tests are run within its own crate, which happens during the plugins CI phase. This should speed up the CI a bit. --- crates/nu-test-support/src/commands.rs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/crates/nu-test-support/src/commands.rs b/crates/nu-test-support/src/commands.rs index dca43c3c14..6939c9bc11 100644 --- a/crates/nu-test-support/src/commands.rs +++ b/crates/nu-test-support/src/commands.rs @@ -21,7 +21,18 @@ pub fn ensure_plugins_built() { } let cargo_path = env!("CARGO"); - let mut arguments = vec!["build", "--package", "nu_plugin_*", "--quiet"]; + let mut arguments = vec![ + "build", + "--workspace", + "--bins", + // Don't build nu, so that we only build the plugins + "--exclude", + "nu", + // Exclude nu_plugin_polars, because it's not needed at this stage, and is a large build + "--exclude", + "nu_plugin_polars", + "--quiet", + ]; let profile = std::env::var("NUSHELL_CARGO_PROFILE"); if let Ok(profile) = &profile { From c10aa2cf09a50fc82127c6a9cbe685075688ad1f Mon Sep 17 00:00:00 2001 From: Devyn Cairns Date: Fri, 17 May 2024 09:46:03 -0700 Subject: [PATCH 049/137] `collect`: don't require a closure (#12788) # Description This changes the `collect` command so that it doesn't require a closure. Still allowed, optionally. Before: ```nushell open foo.json | insert foo bar | collect { save -f foo.json } ``` After: ```nushell open foo.json | insert foo bar | collect | save -f foo.json ``` The closure argument isn't really necessary, as collect values are also supported as `PipelineData`. # User-Facing Changes - `collect` command changed # Tests + Formatting Example changed to reflect. # After Submitting - [ ] release notes - [ ] we may want to deprecate the closure arg? --- .../nu-cmd-lang/src/core_commands/collect.rs | 94 +++++++++++-------- crates/nu-command/src/filesystem/save.rs | 14 ++- crates/nu-command/tests/commands/save.rs | 41 +++++++- 3 files changed, 106 insertions(+), 43 deletions(-) diff --git a/crates/nu-cmd-lang/src/core_commands/collect.rs b/crates/nu-cmd-lang/src/core_commands/collect.rs index 404aa568da..1c28646548 100644 --- a/crates/nu-cmd-lang/src/core_commands/collect.rs +++ b/crates/nu-cmd-lang/src/core_commands/collect.rs @@ -1,5 +1,5 @@ use nu_engine::{command_prelude::*, get_eval_block, redirect_env}; -use nu_protocol::engine::Closure; +use nu_protocol::{engine::Closure, DataSource, PipelineMetadata}; #[derive(Clone)] pub struct Collect; @@ -12,7 +12,7 @@ impl Command for Collect { fn signature(&self) -> Signature { Signature::build("collect") .input_output_types(vec![(Type::Any, Type::Any)]) - .required( + .optional( "closure", SyntaxShape::Closure(Some(vec![SyntaxShape::Any])), "The closure to run once the stream is collected.", @@ -26,7 +26,14 @@ impl Command for Collect { } fn usage(&self) -> &str { - "Collect a stream into a value and then run a closure with the collected value as input." + "Collect a stream into a value." + } + + fn extra_usage(&self) -> &str { + r#"If provided, run a closure with the collected value as input. + +The entire stream will be collected into one value in memory, so if the stream +is particularly large, this can cause high memory usage."# } fn run( @@ -36,46 +43,59 @@ impl Command for Collect { call: &Call, input: PipelineData, ) -> Result { - let closure: Closure = call.req(engine_state, stack, 0)?; + let closure: Option = call.opt(engine_state, stack, 0)?; - let block = engine_state.get_block(closure.block_id); - let mut stack_captures = - stack.captures_to_stack_preserve_out_dest(closure.captures.clone()); + let metadata = match input.metadata() { + // Remove the `FilePath` metadata, because after `collect` it's no longer necessary to + // check where some input came from. + Some(PipelineMetadata { + data_source: DataSource::FilePath(_), + }) => None, + other => other, + }; - let metadata = input.metadata(); let input = input.into_value(call.head)?; + let result; - let mut saved_positional = None; - if let Some(var) = block.signature.get_positional(0) { - if let Some(var_id) = &var.var_id { - stack_captures.add_var(*var_id, input.clone()); - saved_positional = Some(*var_id); + if let Some(closure) = closure { + let block = engine_state.get_block(closure.block_id); + let mut stack_captures = + stack.captures_to_stack_preserve_out_dest(closure.captures.clone()); + + let mut saved_positional = None; + if let Some(var) = block.signature.get_positional(0) { + if let Some(var_id) = &var.var_id { + stack_captures.add_var(*var_id, input.clone()); + saved_positional = Some(*var_id); + } } + + let eval_block = get_eval_block(engine_state); + + result = eval_block( + engine_state, + &mut stack_captures, + block, + input.into_pipeline_data_with_metadata(metadata), + ); + + if call.has_flag(engine_state, stack, "keep-env")? { + redirect_env(engine_state, stack, &stack_captures); + // for when we support `data | let x = $in;` + // remove the variables added earlier + for (var_id, _) in closure.captures { + stack_captures.remove_var(var_id); + } + if let Some(u) = saved_positional { + stack_captures.remove_var(u); + } + // add any new variables to the stack + stack.vars.extend(stack_captures.vars); + } + } else { + result = Ok(input.into_pipeline_data_with_metadata(metadata)); } - let eval_block = get_eval_block(engine_state); - - let result = eval_block( - engine_state, - &mut stack_captures, - block, - input.into_pipeline_data(), - ) - .map(|x| x.set_metadata(metadata)); - - if call.has_flag(engine_state, stack, "keep-env")? { - redirect_env(engine_state, stack, &stack_captures); - // for when we support `data | let x = $in;` - // remove the variables added earlier - for (var_id, _) in closure.captures { - stack_captures.remove_var(var_id); - } - if let Some(u) = saved_positional { - stack_captures.remove_var(u); - } - // add any new variables to the stack - stack.vars.extend(stack_captures.vars); - } result } @@ -88,7 +108,7 @@ impl Command for Collect { }, Example { description: "Read and write to the same file", - example: "open file.txt | collect { save -f file.txt }", + example: "open file.txt | collect | save -f file.txt", result: None, }, ] diff --git a/crates/nu-command/src/filesystem/save.rs b/crates/nu-command/src/filesystem/save.rs index ca9943eafb..7326011d9b 100644 --- a/crates/nu-command/src/filesystem/save.rs +++ b/crates/nu-command/src/filesystem/save.rs @@ -245,11 +245,15 @@ impl Command for Save { Ok(PipelineData::empty()) } input => { - check_saving_to_source_file( - input.metadata().as_ref(), - &path, - stderr_path.as_ref(), - )?; + // It's not necessary to check if we are saving to the same file if this is a + // collected value, and not a stream + if !matches!(input, PipelineData::Value(..) | PipelineData::Empty) { + check_saving_to_source_file( + input.metadata().as_ref(), + &path, + stderr_path.as_ref(), + )?; + } let bytes = input_to_bytes(input, Path::new(&path.item), raw, engine_state, stack, span)?; diff --git a/crates/nu-command/tests/commands/save.rs b/crates/nu-command/tests/commands/save.rs index ef0304dc7c..8a2332afdf 100644 --- a/crates/nu-command/tests/commands/save.rs +++ b/crates/nu-command/tests/commands/save.rs @@ -84,7 +84,7 @@ fn save_append_will_not_overwrite_content() { } #[test] -fn save_stderr_and_stdout_to_afame_file() { +fn save_stderr_and_stdout_to_same_file() { Playground::setup("save_test_5", |dirs, sandbox| { sandbox.with_files(&[]); @@ -424,3 +424,42 @@ fn save_with_custom_converter() { assert_eq!(actual, r#"{"a":1,"b":2}"#); }) } + +#[test] +fn save_same_file_with_collect() { + Playground::setup("save_test_20", |dirs, _sandbox| { + let actual = nu!( + cwd: dirs.test(), pipeline(" + echo 'world' + | save hello; + open hello + | prepend 'hello' + | collect + | save --force hello; + open hello + ") + ); + assert!(actual.status.success()); + assert_eq!("helloworld", actual.out); + }) +} + +#[test] +fn save_same_file_with_collect_and_filter() { + Playground::setup("save_test_21", |dirs, _sandbox| { + let actual = nu!( + cwd: dirs.test(), pipeline(" + echo 'world' + | save hello; + open hello + | prepend 'hello' + | collect + | filter { true } + | save --force hello; + open hello + ") + ); + assert!(actual.status.success()); + assert_eq!("helloworld", actual.out); + }) +} From 580c60bb821af25f838edafd8461bb206d3419f3 Mon Sep 17 00:00:00 2001 From: Ian Manske Date: Fri, 17 May 2024 17:59:32 +0000 Subject: [PATCH 050/137] Preserve metadata in more places (#12848) # Description This PR makes some commands and areas of code preserve pipeline metadata. This is in an attempt to make the issue described in #12599 and #9456 less likely to occur. That is, reading and writing to the same file in a pipeline will result in an empty file. Since we preserve metadata in more places now, there will be a higher chance that we successfully detect this error case and abort the pipeline. --- crates/nu-command/src/filesystem/save.rs | 4 +- .../nu-protocol/src/pipeline/pipeline_data.rs | 104 ++++++++++-------- 2 files changed, 59 insertions(+), 49 deletions(-) diff --git a/crates/nu-command/src/filesystem/save.rs b/crates/nu-command/src/filesystem/save.rs index 7326011d9b..1be74665b2 100644 --- a/crates/nu-command/src/filesystem/save.rs +++ b/crates/nu-command/src/filesystem/save.rs @@ -322,7 +322,9 @@ fn saving_to_source_file_error(dest: &Spanned) -> ShellError { dest.item.display() ), span: Some(dest.span), - help: Some("You should use `collect` to run your save command (see `help collect`). Or, you can put the file data in a variable and then pass the variable to `save`.".into()), + help: Some( + "insert a `collect` command in the pipeline before `save` (see `help collect`).".into(), + ), inner: vec![], } } diff --git a/crates/nu-protocol/src/pipeline/pipeline_data.rs b/crates/nu-protocol/src/pipeline/pipeline_data.rs index b2883b1673..0f4d1eb826 100644 --- a/crates/nu-protocol/src/pipeline/pipeline_data.rs +++ b/crates/nu-protocol/src/pipeline/pipeline_data.rs @@ -291,36 +291,38 @@ impl PipelineData { F: FnMut(Value) -> Value + 'static + Send, { match self { - PipelineData::Value(value, ..) => { + PipelineData::Value(value, metadata) => { let span = value.span(); - match value { + let pipeline = match value { Value::List { vals, .. } => { - Ok(vals.into_iter().map(f).into_pipeline_data(span, ctrlc)) + vals.into_iter().map(f).into_pipeline_data(span, ctrlc) } - Value::Range { val, .. } => Ok(val + Value::Range { val, .. } => val .into_range_iter(span, ctrlc.clone()) .map(f) - .into_pipeline_data(span, ctrlc)), + .into_pipeline_data(span, ctrlc), value => match f(value) { - Value::Error { error, .. } => Err(*error), - v => Ok(v.into_pipeline_data()), + Value::Error { error, .. } => return Err(*error), + v => v.into_pipeline_data(), }, - } + }; + Ok(pipeline.set_metadata(metadata)) } PipelineData::Empty => Ok(PipelineData::Empty), - PipelineData::ListStream(stream, ..) => { - Ok(PipelineData::ListStream(stream.map(f), None)) + PipelineData::ListStream(stream, metadata) => { + Ok(PipelineData::ListStream(stream.map(f), metadata)) } - PipelineData::ByteStream(stream, ..) => { + PipelineData::ByteStream(stream, metadata) => { // TODO: is this behavior desired / correct ? let span = stream.span(); - match String::from_utf8(stream.into_bytes()?) { + let value = match String::from_utf8(stream.into_bytes()?) { Ok(mut str) => { str.truncate(str.trim_end_matches(LINE_ENDING_PATTERN).len()); - Ok(f(Value::string(str, span)).into_pipeline_data()) + f(Value::string(str, span)) } - Err(err) => Ok(f(Value::binary(err.into_bytes(), span)).into_pipeline_data()), - } + Err(err) => f(Value::binary(err.into_bytes(), span)), + }; + Ok(value.into_pipeline_data_with_metadata(metadata)) } } } @@ -339,36 +341,37 @@ impl PipelineData { { match self { PipelineData::Empty => Ok(PipelineData::Empty), - PipelineData::Value(value, ..) => { + PipelineData::Value(value, metadata) => { let span = value.span(); - match value { + let pipeline = match value { Value::List { vals, .. } => { - Ok(vals.into_iter().flat_map(f).into_pipeline_data(span, ctrlc)) + vals.into_iter().flat_map(f).into_pipeline_data(span, ctrlc) } - Value::Range { val, .. } => Ok(val + Value::Range { val, .. } => val .into_range_iter(span, ctrlc.clone()) .flat_map(f) - .into_pipeline_data(span, ctrlc)), - value => Ok(f(value).into_iter().into_pipeline_data(span, ctrlc)), - } + .into_pipeline_data(span, ctrlc), + value => f(value).into_iter().into_pipeline_data(span, ctrlc), + }; + Ok(pipeline.set_metadata(metadata)) } - PipelineData::ListStream(stream, ..) => { - Ok(stream.modify(|iter| iter.flat_map(f)).into()) - } - PipelineData::ByteStream(stream, ..) => { + PipelineData::ListStream(stream, metadata) => Ok(PipelineData::ListStream( + stream.modify(|iter| iter.flat_map(f)), + metadata, + )), + PipelineData::ByteStream(stream, metadata) => { // TODO: is this behavior desired / correct ? let span = stream.span(); - match String::from_utf8(stream.into_bytes()?) { + let iter = match String::from_utf8(stream.into_bytes()?) { Ok(mut str) => { str.truncate(str.trim_end_matches(LINE_ENDING_PATTERN).len()); - Ok(f(Value::string(str, span)) - .into_iter() - .into_pipeline_data(span, ctrlc)) + f(Value::string(str, span)) } - Err(err) => Ok(f(Value::binary(err.into_bytes(), span)) - .into_iter() - .into_pipeline_data(span, ctrlc)), - } + Err(err) => f(Value::binary(err.into_bytes(), span)), + }; + Ok(iter + .into_iter() + .into_pipeline_data_with_metadata(span, ctrlc, metadata)) } } } @@ -384,27 +387,31 @@ impl PipelineData { { match self { PipelineData::Empty => Ok(PipelineData::Empty), - PipelineData::Value(value, ..) => { + PipelineData::Value(value, metadata) => { let span = value.span(); - match value { + let pipeline = match value { Value::List { vals, .. } => { - Ok(vals.into_iter().filter(f).into_pipeline_data(span, ctrlc)) + vals.into_iter().filter(f).into_pipeline_data(span, ctrlc) } - Value::Range { val, .. } => Ok(val + Value::Range { val, .. } => val .into_range_iter(span, ctrlc.clone()) .filter(f) - .into_pipeline_data(span, ctrlc)), + .into_pipeline_data(span, ctrlc), value => { if f(&value) { - Ok(value.into_pipeline_data()) + value.into_pipeline_data() } else { - Ok(Value::nothing(span).into_pipeline_data()) + Value::nothing(span).into_pipeline_data() } } - } + }; + Ok(pipeline.set_metadata(metadata)) } - PipelineData::ListStream(stream, ..) => Ok(stream.modify(|iter| iter.filter(f)).into()), - PipelineData::ByteStream(stream, ..) => { + PipelineData::ListStream(stream, metadata) => Ok(PipelineData::ListStream( + stream.modify(|iter| iter.filter(f)), + metadata, + )), + PipelineData::ByteStream(stream, metadata) => { // TODO: is this behavior desired / correct ? let span = stream.span(); let value = match String::from_utf8(stream.into_bytes()?) { @@ -414,11 +421,12 @@ impl PipelineData { } Err(err) => Value::binary(err.into_bytes(), span), }; - if f(&value) { - Ok(value.into_pipeline_data()) + let value = if f(&value) { + value } else { - Ok(Value::nothing(span).into_pipeline_data()) - } + Value::nothing(span) + }; + Ok(value.into_pipeline_data_with_metadata(metadata)) } } } From cc9f41e553333b1ad0aa4185a912f7a52355a238 Mon Sep 17 00:00:00 2001 From: Ian Manske Date: Sat, 18 May 2024 23:37:31 +0000 Subject: [PATCH 051/137] Use `CommandType` in more places (#12832) # Description Kind of a vague title, but this PR does two main things: 1. Rather than overriding functions like `Command::is_parser_keyword`, this PR instead changes commands to override `Command::command_type`. The `CommandType` returned by `Command::command_type` is then used to automatically determine whether `Command::is_parser_keyword` and the other `is_{type}` functions should return true. These changes allow us to remove the `CommandType::Other` case and should also guarantee than only one of the `is_{type}` functions on `Command` will return true. 2. Uses the new, reworked `Command::command_type` function in the `scope commands` and `which` commands. # User-Facing Changes - Breaking change for `scope commands`: multiple columns (`is_builtin`, `is_keyword`, `is_plugin`, etc.) have been merged into the `type` column. - Breaking change: the `which` command can now report `plugin` or `keyword` instead of `built-in` in the `type` column. It may also now report `external` instead of `custom` in the `type` column for known `extern`s. --- crates/nu-cli/src/commands/keybindings.rs | 2 +- crates/nu-cli/src/menus/help_completions.rs | 6 +- crates/nu-cmd-dataframe/src/dataframe/stub.rs | 2 +- crates/nu-cmd-extra/src/extra/bits/bits_.rs | 2 +- .../src/extra/filters/roll/roll_.rs | 2 +- .../src/extra/strings/str_/case/str_.rs | 2 +- crates/nu-cmd-lang/src/core_commands/alias.rs | 5 +- .../nu-cmd-lang/src/core_commands/const_.rs | 5 +- crates/nu-cmd-lang/src/core_commands/def.rs | 5 +- .../nu-cmd-lang/src/core_commands/export.rs | 7 +- .../src/core_commands/export_alias.rs | 5 +- .../src/core_commands/export_const.rs | 5 +- .../src/core_commands/export_def.rs | 5 +- .../src/core_commands/export_extern.rs | 5 +- .../src/core_commands/export_module.rs | 5 +- .../src/core_commands/export_use.rs | 5 +- .../nu-cmd-lang/src/core_commands/extern_.rs | 5 +- crates/nu-cmd-lang/src/core_commands/for_.rs | 5 +- crates/nu-cmd-lang/src/core_commands/hide.rs | 5 +- crates/nu-cmd-lang/src/core_commands/let_.rs | 5 +- .../nu-cmd-lang/src/core_commands/module.rs | 5 +- crates/nu-cmd-lang/src/core_commands/mut_.rs | 5 +- .../src/core_commands/overlay/command.rs | 7 +- .../src/core_commands/overlay/hide.rs | 5 +- .../src/core_commands/overlay/new.rs | 5 +- .../src/core_commands/overlay/use_.rs | 6 +- .../nu-cmd-lang/src/core_commands/return_.rs | 5 +- .../src/core_commands/scope/command.rs | 7 +- crates/nu-cmd-lang/src/core_commands/use_.rs | 9 +- .../nu-cmd-plugin/src/commands/plugin/mod.rs | 2 +- .../nu-cmd-plugin/src/commands/plugin/use_.rs | 5 +- crates/nu-cmd-plugin/src/commands/register.rs | 5 +- crates/nu-command/src/bytes/bytes_.rs | 2 +- .../src/conversions/into/command.rs | 2 +- crates/nu-command/src/debug/view.rs | 2 +- crates/nu-command/src/debug/view_source.rs | 2 +- crates/nu-command/src/env/config/config_.rs | 2 +- crates/nu-command/src/filesystem/open.rs | 2 +- crates/nu-command/src/filesystem/save.rs | 2 +- crates/nu-command/src/formats/from/command.rs | 2 +- crates/nu-command/src/formats/to/command.rs | 2 +- crates/nu-command/src/hash/hash_.rs | 2 +- crates/nu-command/src/help/help_commands.rs | 13 +- crates/nu-command/src/help/help_externs.rs | 13 +- crates/nu-command/src/math/math_.rs | 2 +- crates/nu-command/src/misc/source.rs | 5 +- crates/nu-command/src/network/http/http_.rs | 2 +- crates/nu-command/src/network/url/url_.rs | 2 +- crates/nu-command/src/path/path_.rs | 2 +- crates/nu-command/src/random/random_.rs | 2 +- crates/nu-command/src/stor/stor_.rs | 2 +- .../nu-command/src/strings/format/format_.rs | 2 +- .../nu-command/src/strings/split/command.rs | 2 +- .../nu-command/src/strings/str_/case/str_.rs | 2 +- crates/nu-command/src/system/which_.rs | 27 ++-- crates/nu-engine/src/eval.rs | 4 +- crates/nu-engine/src/scope.rs | 7 +- crates/nu-lsp/src/lib.rs | 2 +- crates/nu-parser/src/known_external.rs | 13 +- crates/nu-parser/src/parse_keywords.rs | 2 +- crates/nu-parser/src/parser.rs | 2 +- crates/nu-plugin-engine/src/declaration.rs | 6 +- crates/nu-protocol/src/alias.rs | 6 +- crates/nu-protocol/src/engine/command.rs | 119 ++++++++---------- crates/nu-protocol/src/engine/engine_state.rs | 18 +-- .../nu-protocol/src/pipeline/pipeline_data.rs | 2 +- crates/nu-protocol/src/signature.rs | 8 +- src/ide.rs | 2 +- 68 files changed, 224 insertions(+), 217 deletions(-) diff --git a/crates/nu-cli/src/commands/keybindings.rs b/crates/nu-cli/src/commands/keybindings.rs index 469c0f96cd..a8c8053a56 100644 --- a/crates/nu-cli/src/commands/keybindings.rs +++ b/crates/nu-cli/src/commands/keybindings.rs @@ -42,7 +42,7 @@ For more information on input and keybindings, check: &Keybindings.examples(), engine_state, stack, - self.is_parser_keyword(), + self.is_keyword(), ), call.head, ) diff --git a/crates/nu-cli/src/menus/help_completions.rs b/crates/nu-cli/src/menus/help_completions.rs index b8bdaad435..62f40b9d8d 100644 --- a/crates/nu-cli/src/menus/help_completions.rs +++ b/crates/nu-cli/src/menus/help_completions.rs @@ -18,7 +18,7 @@ impl NuHelpCompleter { //Vec<(Signature, Vec, bool, bool)> { let mut commands = full_commands .iter() - .filter(|(sig, _, _, _, _)| { + .filter(|(sig, _, _)| { sig.name.to_folded_case().contains(&folded_line) || sig.usage.to_folded_case().contains(&folded_line) || sig @@ -29,7 +29,7 @@ impl NuHelpCompleter { }) .collect::>(); - commands.sort_by(|(a, _, _, _, _), (b, _, _, _, _)| { + commands.sort_by(|(a, _, _), (b, _, _)| { let a_distance = levenshtein_distance(line, &a.name); let b_distance = levenshtein_distance(line, &b.name); a_distance.cmp(&b_distance) @@ -37,7 +37,7 @@ impl NuHelpCompleter { commands .into_iter() - .map(|(sig, examples, _, _, _)| { + .map(|(sig, examples, _)| { let mut long_desc = String::new(); let usage = &sig.usage; diff --git a/crates/nu-cmd-dataframe/src/dataframe/stub.rs b/crates/nu-cmd-dataframe/src/dataframe/stub.rs index 2d8cfde423..58dd2996cd 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/stub.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/stub.rs @@ -35,7 +35,7 @@ impl Command for Dfr { &Dfr.examples(), engine_state, stack, - self.is_parser_keyword(), + self.is_keyword(), ), call.head, ) diff --git a/crates/nu-cmd-extra/src/extra/bits/bits_.rs b/crates/nu-cmd-extra/src/extra/bits/bits_.rs index 6767d3dd83..d795beda4e 100644 --- a/crates/nu-cmd-extra/src/extra/bits/bits_.rs +++ b/crates/nu-cmd-extra/src/extra/bits/bits_.rs @@ -35,7 +35,7 @@ impl Command for Bits { &Bits.examples(), engine_state, stack, - self.is_parser_keyword(), + self.is_keyword(), ), call.head, ) diff --git a/crates/nu-cmd-extra/src/extra/filters/roll/roll_.rs b/crates/nu-cmd-extra/src/extra/filters/roll/roll_.rs index 76e167a575..a1622d71c0 100644 --- a/crates/nu-cmd-extra/src/extra/filters/roll/roll_.rs +++ b/crates/nu-cmd-extra/src/extra/filters/roll/roll_.rs @@ -39,7 +39,7 @@ impl Command for Roll { &Roll.examples(), engine_state, stack, - self.is_parser_keyword(), + self.is_keyword(), ), call.head, ) diff --git a/crates/nu-cmd-extra/src/extra/strings/str_/case/str_.rs b/crates/nu-cmd-extra/src/extra/strings/str_/case/str_.rs index cf4537f046..56e0d1164f 100644 --- a/crates/nu-cmd-extra/src/extra/strings/str_/case/str_.rs +++ b/crates/nu-cmd-extra/src/extra/strings/str_/case/str_.rs @@ -35,7 +35,7 @@ impl Command for Str { &Str.examples(), engine_state, stack, - self.is_parser_keyword(), + self.is_keyword(), ), call.head, ) diff --git a/crates/nu-cmd-lang/src/core_commands/alias.rs b/crates/nu-cmd-lang/src/core_commands/alias.rs index f3603611e4..f14f4d5827 100644 --- a/crates/nu-cmd-lang/src/core_commands/alias.rs +++ b/crates/nu-cmd-lang/src/core_commands/alias.rs @@ -1,4 +1,5 @@ use nu_engine::command_prelude::*; +use nu_protocol::engine::CommandType; #[derive(Clone)] pub struct Alias; @@ -29,8 +30,8 @@ impl Command for Alias { https://www.nushell.sh/book/thinking_in_nu.html"# } - fn is_parser_keyword(&self) -> bool { - true + fn command_type(&self) -> CommandType { + CommandType::Keyword } fn search_terms(&self) -> Vec<&str> { diff --git a/crates/nu-cmd-lang/src/core_commands/const_.rs b/crates/nu-cmd-lang/src/core_commands/const_.rs index 4076ae87c9..f780c5ada9 100644 --- a/crates/nu-cmd-lang/src/core_commands/const_.rs +++ b/crates/nu-cmd-lang/src/core_commands/const_.rs @@ -1,4 +1,5 @@ use nu_engine::command_prelude::*; +use nu_protocol::engine::CommandType; #[derive(Clone)] pub struct Const; @@ -30,8 +31,8 @@ impl Command for Const { https://www.nushell.sh/book/thinking_in_nu.html"# } - fn is_parser_keyword(&self) -> bool { - true + fn command_type(&self) -> CommandType { + CommandType::Keyword } fn search_terms(&self) -> Vec<&str> { diff --git a/crates/nu-cmd-lang/src/core_commands/def.rs b/crates/nu-cmd-lang/src/core_commands/def.rs index 922ba78abb..eb1124da19 100644 --- a/crates/nu-cmd-lang/src/core_commands/def.rs +++ b/crates/nu-cmd-lang/src/core_commands/def.rs @@ -1,4 +1,5 @@ use nu_engine::command_prelude::*; +use nu_protocol::engine::CommandType; #[derive(Clone)] pub struct Def; @@ -28,8 +29,8 @@ impl Command for Def { https://www.nushell.sh/book/thinking_in_nu.html"# } - fn is_parser_keyword(&self) -> bool { - true + fn command_type(&self) -> CommandType { + CommandType::Keyword } fn run( diff --git a/crates/nu-cmd-lang/src/core_commands/export.rs b/crates/nu-cmd-lang/src/core_commands/export.rs index e5d3d45683..8634a8c06b 100644 --- a/crates/nu-cmd-lang/src/core_commands/export.rs +++ b/crates/nu-cmd-lang/src/core_commands/export.rs @@ -1,4 +1,5 @@ use nu_engine::{command_prelude::*, get_full_help}; +use nu_protocol::engine::CommandType; #[derive(Clone)] pub struct ExportCommand; @@ -23,8 +24,8 @@ impl Command for ExportCommand { https://www.nushell.sh/book/thinking_in_nu.html"# } - fn is_parser_keyword(&self) -> bool { - true + fn command_type(&self) -> CommandType { + CommandType::Keyword } fn run( @@ -40,7 +41,7 @@ impl Command for ExportCommand { &ExportCommand.examples(), engine_state, stack, - self.is_parser_keyword(), + self.is_keyword(), ), call.head, ) diff --git a/crates/nu-cmd-lang/src/core_commands/export_alias.rs b/crates/nu-cmd-lang/src/core_commands/export_alias.rs index 14caddcc7a..4df335da44 100644 --- a/crates/nu-cmd-lang/src/core_commands/export_alias.rs +++ b/crates/nu-cmd-lang/src/core_commands/export_alias.rs @@ -1,4 +1,5 @@ use nu_engine::command_prelude::*; +use nu_protocol::engine::CommandType; #[derive(Clone)] pub struct ExportAlias; @@ -29,8 +30,8 @@ impl Command for ExportAlias { https://www.nushell.sh/book/thinking_in_nu.html"# } - fn is_parser_keyword(&self) -> bool { - true + fn command_type(&self) -> CommandType { + CommandType::Keyword } fn search_terms(&self) -> Vec<&str> { diff --git a/crates/nu-cmd-lang/src/core_commands/export_const.rs b/crates/nu-cmd-lang/src/core_commands/export_const.rs index 988db50b2a..631d85ad89 100644 --- a/crates/nu-cmd-lang/src/core_commands/export_const.rs +++ b/crates/nu-cmd-lang/src/core_commands/export_const.rs @@ -1,4 +1,5 @@ use nu_engine::command_prelude::*; +use nu_protocol::engine::CommandType; #[derive(Clone)] pub struct ExportConst; @@ -30,8 +31,8 @@ impl Command for ExportConst { https://www.nushell.sh/book/thinking_in_nu.html"# } - fn is_parser_keyword(&self) -> bool { - true + fn command_type(&self) -> CommandType { + CommandType::Keyword } fn run( diff --git a/crates/nu-cmd-lang/src/core_commands/export_def.rs b/crates/nu-cmd-lang/src/core_commands/export_def.rs index 93c5932efb..7a2d3949e1 100644 --- a/crates/nu-cmd-lang/src/core_commands/export_def.rs +++ b/crates/nu-cmd-lang/src/core_commands/export_def.rs @@ -1,4 +1,5 @@ use nu_engine::command_prelude::*; +use nu_protocol::engine::CommandType; #[derive(Clone)] pub struct ExportDef; @@ -28,8 +29,8 @@ impl Command for ExportDef { https://www.nushell.sh/book/thinking_in_nu.html"# } - fn is_parser_keyword(&self) -> bool { - true + fn command_type(&self) -> CommandType { + CommandType::Keyword } fn run( diff --git a/crates/nu-cmd-lang/src/core_commands/export_extern.rs b/crates/nu-cmd-lang/src/core_commands/export_extern.rs index 9ca756cf93..1a2ba4e5cb 100644 --- a/crates/nu-cmd-lang/src/core_commands/export_extern.rs +++ b/crates/nu-cmd-lang/src/core_commands/export_extern.rs @@ -1,4 +1,5 @@ use nu_engine::command_prelude::*; +use nu_protocol::engine::CommandType; #[derive(Clone)] pub struct ExportExtern; @@ -25,8 +26,8 @@ impl Command for ExportExtern { https://www.nushell.sh/book/thinking_in_nu.html"# } - fn is_parser_keyword(&self) -> bool { - true + fn command_type(&self) -> CommandType { + CommandType::Keyword } fn run( diff --git a/crates/nu-cmd-lang/src/core_commands/export_module.rs b/crates/nu-cmd-lang/src/core_commands/export_module.rs index fdbd143fb0..53a6ca750c 100644 --- a/crates/nu-cmd-lang/src/core_commands/export_module.rs +++ b/crates/nu-cmd-lang/src/core_commands/export_module.rs @@ -1,4 +1,5 @@ use nu_engine::command_prelude::*; +use nu_protocol::engine::CommandType; #[derive(Clone)] pub struct ExportModule; @@ -30,8 +31,8 @@ impl Command for ExportModule { https://www.nushell.sh/book/thinking_in_nu.html"# } - fn is_parser_keyword(&self) -> bool { - true + fn command_type(&self) -> CommandType { + CommandType::Keyword } fn run( diff --git a/crates/nu-cmd-lang/src/core_commands/export_use.rs b/crates/nu-cmd-lang/src/core_commands/export_use.rs index 2e4fd3f3e9..5ca96a899e 100644 --- a/crates/nu-cmd-lang/src/core_commands/export_use.rs +++ b/crates/nu-cmd-lang/src/core_commands/export_use.rs @@ -1,4 +1,5 @@ use nu_engine::command_prelude::*; +use nu_protocol::engine::CommandType; #[derive(Clone)] pub struct ExportUse; @@ -29,8 +30,8 @@ impl Command for ExportUse { https://www.nushell.sh/book/thinking_in_nu.html"# } - fn is_parser_keyword(&self) -> bool { - true + fn command_type(&self) -> CommandType { + CommandType::Keyword } fn run( diff --git a/crates/nu-cmd-lang/src/core_commands/extern_.rs b/crates/nu-cmd-lang/src/core_commands/extern_.rs index 71400dbb7c..496f104650 100644 --- a/crates/nu-cmd-lang/src/core_commands/extern_.rs +++ b/crates/nu-cmd-lang/src/core_commands/extern_.rs @@ -1,4 +1,5 @@ use nu_engine::command_prelude::*; +use nu_protocol::engine::CommandType; #[derive(Clone)] pub struct Extern; @@ -25,8 +26,8 @@ impl Command for Extern { https://www.nushell.sh/book/thinking_in_nu.html"# } - fn is_parser_keyword(&self) -> bool { - true + fn command_type(&self) -> CommandType { + CommandType::Keyword } fn run( diff --git a/crates/nu-cmd-lang/src/core_commands/for_.rs b/crates/nu-cmd-lang/src/core_commands/for_.rs index 6f9391614e..387af45282 100644 --- a/crates/nu-cmd-lang/src/core_commands/for_.rs +++ b/crates/nu-cmd-lang/src/core_commands/for_.rs @@ -1,4 +1,5 @@ use nu_engine::{command_prelude::*, get_eval_block, get_eval_expression}; +use nu_protocol::engine::CommandType; #[derive(Clone)] pub struct For; @@ -41,8 +42,8 @@ impl Command for For { https://www.nushell.sh/book/thinking_in_nu.html"# } - fn is_parser_keyword(&self) -> bool { - true + fn command_type(&self) -> CommandType { + CommandType::Keyword } fn run( diff --git a/crates/nu-cmd-lang/src/core_commands/hide.rs b/crates/nu-cmd-lang/src/core_commands/hide.rs index 2cfafa6c02..d52d2131c9 100644 --- a/crates/nu-cmd-lang/src/core_commands/hide.rs +++ b/crates/nu-cmd-lang/src/core_commands/hide.rs @@ -1,4 +1,5 @@ use nu_engine::command_prelude::*; +use nu_protocol::engine::CommandType; #[derive(Clone)] pub struct Hide; @@ -31,8 +32,8 @@ This command is a parser keyword. For details, check: https://www.nushell.sh/book/thinking_in_nu.html"# } - fn is_parser_keyword(&self) -> bool { - true + fn command_type(&self) -> CommandType { + CommandType::Keyword } fn run( diff --git a/crates/nu-cmd-lang/src/core_commands/let_.rs b/crates/nu-cmd-lang/src/core_commands/let_.rs index cc5504d8d6..f2da628c31 100644 --- a/crates/nu-cmd-lang/src/core_commands/let_.rs +++ b/crates/nu-cmd-lang/src/core_commands/let_.rs @@ -1,4 +1,5 @@ use nu_engine::{command_prelude::*, get_eval_block}; +use nu_protocol::engine::CommandType; #[derive(Clone)] pub struct Let; @@ -30,8 +31,8 @@ impl Command for Let { https://www.nushell.sh/book/thinking_in_nu.html"# } - fn is_parser_keyword(&self) -> bool { - true + fn command_type(&self) -> CommandType { + CommandType::Keyword } fn search_terms(&self) -> Vec<&str> { diff --git a/crates/nu-cmd-lang/src/core_commands/module.rs b/crates/nu-cmd-lang/src/core_commands/module.rs index 45641649ff..908c0764e5 100644 --- a/crates/nu-cmd-lang/src/core_commands/module.rs +++ b/crates/nu-cmd-lang/src/core_commands/module.rs @@ -1,4 +1,5 @@ use nu_engine::command_prelude::*; +use nu_protocol::engine::CommandType; #[derive(Clone)] pub struct Module; @@ -30,8 +31,8 @@ impl Command for Module { https://www.nushell.sh/book/thinking_in_nu.html"# } - fn is_parser_keyword(&self) -> bool { - true + fn command_type(&self) -> CommandType { + CommandType::Keyword } fn run( diff --git a/crates/nu-cmd-lang/src/core_commands/mut_.rs b/crates/nu-cmd-lang/src/core_commands/mut_.rs index 60c4c146db..5db3c929af 100644 --- a/crates/nu-cmd-lang/src/core_commands/mut_.rs +++ b/crates/nu-cmd-lang/src/core_commands/mut_.rs @@ -1,4 +1,5 @@ use nu_engine::{command_prelude::*, get_eval_block}; +use nu_protocol::engine::CommandType; #[derive(Clone)] pub struct Mut; @@ -30,8 +31,8 @@ impl Command for Mut { https://www.nushell.sh/book/thinking_in_nu.html"# } - fn is_parser_keyword(&self) -> bool { - true + fn command_type(&self) -> CommandType { + CommandType::Keyword } fn search_terms(&self) -> Vec<&str> { diff --git a/crates/nu-cmd-lang/src/core_commands/overlay/command.rs b/crates/nu-cmd-lang/src/core_commands/overlay/command.rs index db502c0932..72cc28e77c 100644 --- a/crates/nu-cmd-lang/src/core_commands/overlay/command.rs +++ b/crates/nu-cmd-lang/src/core_commands/overlay/command.rs @@ -1,4 +1,5 @@ use nu_engine::{command_prelude::*, get_full_help}; +use nu_protocol::engine::CommandType; #[derive(Clone)] pub struct Overlay; @@ -25,8 +26,8 @@ impl Command for Overlay { You must use one of the following subcommands. Using this command as-is will only produce this help message."# } - fn is_parser_keyword(&self) -> bool { - true + fn command_type(&self) -> CommandType { + CommandType::Keyword } fn run( @@ -42,7 +43,7 @@ impl Command for Overlay { &[], engine_state, stack, - self.is_parser_keyword(), + self.is_keyword(), ), call.head, ) diff --git a/crates/nu-cmd-lang/src/core_commands/overlay/hide.rs b/crates/nu-cmd-lang/src/core_commands/overlay/hide.rs index c1b4a653bc..7ea84a2a91 100644 --- a/crates/nu-cmd-lang/src/core_commands/overlay/hide.rs +++ b/crates/nu-cmd-lang/src/core_commands/overlay/hide.rs @@ -1,4 +1,5 @@ use nu_engine::command_prelude::*; +use nu_protocol::engine::CommandType; #[derive(Clone)] pub struct OverlayHide; @@ -35,8 +36,8 @@ impl Command for OverlayHide { https://www.nushell.sh/book/thinking_in_nu.html"# } - fn is_parser_keyword(&self) -> bool { - true + fn command_type(&self) -> CommandType { + CommandType::Keyword } fn run( diff --git a/crates/nu-cmd-lang/src/core_commands/overlay/new.rs b/crates/nu-cmd-lang/src/core_commands/overlay/new.rs index 8f9a0e53ea..a571c37947 100644 --- a/crates/nu-cmd-lang/src/core_commands/overlay/new.rs +++ b/crates/nu-cmd-lang/src/core_commands/overlay/new.rs @@ -1,4 +1,5 @@ use nu_engine::command_prelude::*; +use nu_protocol::engine::CommandType; #[derive(Clone)] pub struct OverlayNew; @@ -33,8 +34,8 @@ This command is a parser keyword. For details, check: https://www.nushell.sh/book/thinking_in_nu.html"# } - fn is_parser_keyword(&self) -> bool { - true + fn command_type(&self) -> CommandType { + CommandType::Keyword } fn run( diff --git a/crates/nu-cmd-lang/src/core_commands/overlay/use_.rs b/crates/nu-cmd-lang/src/core_commands/overlay/use_.rs index 13c3f711ad..e8b51fb59b 100644 --- a/crates/nu-cmd-lang/src/core_commands/overlay/use_.rs +++ b/crates/nu-cmd-lang/src/core_commands/overlay/use_.rs @@ -2,7 +2,7 @@ use nu_engine::{ command_prelude::*, find_in_dirs_env, get_dirs_var_from_call, get_eval_block, redirect_env, }; use nu_parser::trim_quotes_str; -use nu_protocol::ast::Expr; +use nu_protocol::{ast::Expr, engine::CommandType}; use std::path::Path; @@ -50,8 +50,8 @@ impl Command for OverlayUse { https://www.nushell.sh/book/thinking_in_nu.html"# } - fn is_parser_keyword(&self) -> bool { - true + fn command_type(&self) -> CommandType { + CommandType::Keyword } fn run( diff --git a/crates/nu-cmd-lang/src/core_commands/return_.rs b/crates/nu-cmd-lang/src/core_commands/return_.rs index 969456d005..478224079b 100644 --- a/crates/nu-cmd-lang/src/core_commands/return_.rs +++ b/crates/nu-cmd-lang/src/core_commands/return_.rs @@ -1,4 +1,5 @@ use nu_engine::command_prelude::*; +use nu_protocol::engine::CommandType; #[derive(Clone)] pub struct Return; @@ -28,8 +29,8 @@ impl Command for Return { https://www.nushell.sh/book/thinking_in_nu.html"# } - fn is_parser_keyword(&self) -> bool { - true + fn command_type(&self) -> CommandType { + CommandType::Keyword } fn run( diff --git a/crates/nu-cmd-lang/src/core_commands/scope/command.rs b/crates/nu-cmd-lang/src/core_commands/scope/command.rs index da507f3159..72a9e74932 100644 --- a/crates/nu-cmd-lang/src/core_commands/scope/command.rs +++ b/crates/nu-cmd-lang/src/core_commands/scope/command.rs @@ -1,4 +1,5 @@ use nu_engine::{command_prelude::*, get_full_help}; +use nu_protocol::engine::CommandType; #[derive(Clone)] pub struct Scope; @@ -19,8 +20,8 @@ impl Command for Scope { "Commands for getting info about what is in scope." } - fn is_parser_keyword(&self) -> bool { - true + fn command_type(&self) -> CommandType { + CommandType::Keyword } fn run( @@ -36,7 +37,7 @@ impl Command for Scope { &[], engine_state, stack, - self.is_parser_keyword(), + self.is_keyword(), ), call.head, ) diff --git a/crates/nu-cmd-lang/src/core_commands/use_.rs b/crates/nu-cmd-lang/src/core_commands/use_.rs index 32978d7e62..b0f3648304 100644 --- a/crates/nu-cmd-lang/src/core_commands/use_.rs +++ b/crates/nu-cmd-lang/src/core_commands/use_.rs @@ -1,7 +1,10 @@ use nu_engine::{ command_prelude::*, find_in_dirs_env, get_dirs_var_from_call, get_eval_block, redirect_env, }; -use nu_protocol::ast::{Expr, Expression}; +use nu_protocol::{ + ast::{Expr, Expression}, + engine::CommandType, +}; #[derive(Clone)] pub struct Use; @@ -40,8 +43,8 @@ This command is a parser keyword. For details, check: https://www.nushell.sh/book/thinking_in_nu.html"# } - fn is_parser_keyword(&self) -> bool { - true + fn command_type(&self) -> CommandType { + CommandType::Keyword } fn run( diff --git a/crates/nu-cmd-plugin/src/commands/plugin/mod.rs b/crates/nu-cmd-plugin/src/commands/plugin/mod.rs index 87daa5a328..cf4ee5d9a3 100644 --- a/crates/nu-cmd-plugin/src/commands/plugin/mod.rs +++ b/crates/nu-cmd-plugin/src/commands/plugin/mod.rs @@ -43,7 +43,7 @@ impl Command for PluginCommand { &PluginCommand.examples(), engine_state, stack, - self.is_parser_keyword(), + self.is_keyword(), ), call.head, ) diff --git a/crates/nu-cmd-plugin/src/commands/plugin/use_.rs b/crates/nu-cmd-plugin/src/commands/plugin/use_.rs index e5997efcf0..3cfb28f28b 100644 --- a/crates/nu-cmd-plugin/src/commands/plugin/use_.rs +++ b/crates/nu-cmd-plugin/src/commands/plugin/use_.rs @@ -1,4 +1,5 @@ use nu_engine::command_prelude::*; +use nu_protocol::engine::CommandType; #[derive(Clone)] pub struct PluginUse; @@ -52,8 +53,8 @@ it was already previously registered with `plugin add`. vec!["add", "register", "scope"] } - fn is_parser_keyword(&self) -> bool { - true + fn command_type(&self) -> CommandType { + CommandType::Keyword } fn run( diff --git a/crates/nu-cmd-plugin/src/commands/register.rs b/crates/nu-cmd-plugin/src/commands/register.rs index 924ab00d62..2c10456db7 100644 --- a/crates/nu-cmd-plugin/src/commands/register.rs +++ b/crates/nu-cmd-plugin/src/commands/register.rs @@ -1,4 +1,5 @@ use nu_engine::command_prelude::*; +use nu_protocol::engine::CommandType; #[derive(Clone)] pub struct Register; @@ -48,8 +49,8 @@ This command is a parser keyword. For details, check: vec!["add"] } - fn is_parser_keyword(&self) -> bool { - true + fn command_type(&self) -> CommandType { + CommandType::Keyword } fn run( diff --git a/crates/nu-command/src/bytes/bytes_.rs b/crates/nu-command/src/bytes/bytes_.rs index f262e6a82e..451ee1e5d5 100644 --- a/crates/nu-command/src/bytes/bytes_.rs +++ b/crates/nu-command/src/bytes/bytes_.rs @@ -35,7 +35,7 @@ impl Command for Bytes { &Bytes.examples(), engine_state, stack, - self.is_parser_keyword(), + self.is_keyword(), ), call.head, ) diff --git a/crates/nu-command/src/conversions/into/command.rs b/crates/nu-command/src/conversions/into/command.rs index 37bbbff02e..5a8175b298 100644 --- a/crates/nu-command/src/conversions/into/command.rs +++ b/crates/nu-command/src/conversions/into/command.rs @@ -35,7 +35,7 @@ impl Command for Into { &[], engine_state, stack, - self.is_parser_keyword(), + self.is_keyword(), ), call.head, ) diff --git a/crates/nu-command/src/debug/view.rs b/crates/nu-command/src/debug/view.rs index 38a4efc2e7..fcbc377e4b 100644 --- a/crates/nu-command/src/debug/view.rs +++ b/crates/nu-command/src/debug/view.rs @@ -35,7 +35,7 @@ impl Command for View { &View.examples(), engine_state, stack, - self.is_parser_keyword(), + self.is_keyword(), ), call.head, ) diff --git a/crates/nu-command/src/debug/view_source.rs b/crates/nu-command/src/debug/view_source.rs index 974a92e1ee..54bca55956 100644 --- a/crates/nu-command/src/debug/view_source.rs +++ b/crates/nu-command/src/debug/view_source.rs @@ -55,7 +55,7 @@ impl Command for ViewSource { } } // gets vector of positionals. - else if let Some(block_id) = decl.get_block_id() { + else if let Some(block_id) = decl.block_id() { let block = engine_state.get_block(block_id); if let Some(block_span) = block.span { let contents = engine_state.get_span_contents(block_span); diff --git a/crates/nu-command/src/env/config/config_.rs b/crates/nu-command/src/env/config/config_.rs index 30285c5c9e..948e8248b8 100644 --- a/crates/nu-command/src/env/config/config_.rs +++ b/crates/nu-command/src/env/config/config_.rs @@ -35,7 +35,7 @@ impl Command for ConfigMeta { &ConfigMeta.examples(), engine_state, stack, - self.is_parser_keyword(), + self.is_keyword(), ), call.head, ) diff --git a/crates/nu-command/src/filesystem/open.rs b/crates/nu-command/src/filesystem/open.rs index 5fb8527511..842eaa5f4c 100644 --- a/crates/nu-command/src/filesystem/open.rs +++ b/crates/nu-command/src/filesystem/open.rs @@ -172,7 +172,7 @@ impl Command for Open { match converter { Some((converter_id, ext)) => { let decl = engine_state.get_decl(converter_id); - let command_output = if let Some(block_id) = decl.get_block_id() { + let command_output = if let Some(block_id) = decl.block_id() { let block = engine_state.get_block(block_id); eval_block(engine_state, stack, block, stream) } else { diff --git a/crates/nu-command/src/filesystem/save.rs b/crates/nu-command/src/filesystem/save.rs index 1be74665b2..340ceb4f62 100644 --- a/crates/nu-command/src/filesystem/save.rs +++ b/crates/nu-command/src/filesystem/save.rs @@ -393,7 +393,7 @@ fn convert_to_extension( ) -> Result { if let Some(decl_id) = engine_state.find_decl(format!("to {extension}").as_bytes(), &[]) { let decl = engine_state.get_decl(decl_id); - if let Some(block_id) = decl.get_block_id() { + if let Some(block_id) = decl.block_id() { let block = engine_state.get_block(block_id); let eval_block = get_eval_block(engine_state); eval_block(engine_state, stack, block, input) diff --git a/crates/nu-command/src/formats/from/command.rs b/crates/nu-command/src/formats/from/command.rs index ce5987e5b1..3df3d86e2e 100644 --- a/crates/nu-command/src/formats/from/command.rs +++ b/crates/nu-command/src/formats/from/command.rs @@ -35,7 +35,7 @@ impl Command for From { &From.examples(), engine_state, stack, - self.is_parser_keyword(), + self.is_keyword(), ), call.head, ) diff --git a/crates/nu-command/src/formats/to/command.rs b/crates/nu-command/src/formats/to/command.rs index 26c9a259b6..1288c2d73b 100644 --- a/crates/nu-command/src/formats/to/command.rs +++ b/crates/nu-command/src/formats/to/command.rs @@ -35,7 +35,7 @@ impl Command for To { &To.examples(), engine_state, stack, - self.is_parser_keyword(), + self.is_keyword(), ), call.head, ) diff --git a/crates/nu-command/src/hash/hash_.rs b/crates/nu-command/src/hash/hash_.rs index e3b19624a2..fc7f58cd3b 100644 --- a/crates/nu-command/src/hash/hash_.rs +++ b/crates/nu-command/src/hash/hash_.rs @@ -35,7 +35,7 @@ impl Command for Hash { &Self.examples(), engine_state, stack, - self.is_parser_keyword(), + self.is_keyword(), ), call.head, ) diff --git a/crates/nu-command/src/help/help_commands.rs b/crates/nu-command/src/help/help_commands.rs index bc0fd92d92..2633595356 100644 --- a/crates/nu-command/src/help/help_commands.rs +++ b/crates/nu-command/src/help/help_commands.rs @@ -1,6 +1,7 @@ use crate::help::highlight_search_in_table; use nu_color_config::StyleComputer; use nu_engine::{command_prelude::*, get_full_help}; +use nu_protocol::engine::CommandType; #[derive(Clone)] pub struct HelpCommands; @@ -90,9 +91,15 @@ pub fn help_commands( let output = engine_state .get_signatures_with_examples(false) .iter() - .filter(|(signature, _, _, _, _)| signature.name == name) - .map(|(signature, examples, _, _, is_parser_keyword)| { - get_full_help(signature, examples, engine_state, stack, *is_parser_keyword) + .filter(|(signature, _, _)| signature.name == name) + .map(|(signature, examples, cmd_type)| { + get_full_help( + signature, + examples, + engine_state, + stack, + cmd_type == &CommandType::Keyword, + ) }) .collect::>(); diff --git a/crates/nu-command/src/help/help_externs.rs b/crates/nu-command/src/help/help_externs.rs index 22fb4a303c..0378553463 100644 --- a/crates/nu-command/src/help/help_externs.rs +++ b/crates/nu-command/src/help/help_externs.rs @@ -1,6 +1,7 @@ use crate::help::highlight_search_in_table; use nu_color_config::StyleComputer; use nu_engine::{command_prelude::*, get_full_help, scope::ScopeData}; +use nu_protocol::engine::CommandType; #[derive(Clone)] pub struct HelpExterns; @@ -110,9 +111,15 @@ pub fn help_externs( let output = engine_state .get_signatures_with_examples(false) .iter() - .filter(|(signature, _, _, _, _)| signature.name == name) - .map(|(signature, examples, _, _, is_parser_keyword)| { - get_full_help(signature, examples, engine_state, stack, *is_parser_keyword) + .filter(|(signature, _, _)| signature.name == name) + .map(|(signature, examples, cmd_type)| { + get_full_help( + signature, + examples, + engine_state, + stack, + cmd_type == &CommandType::Keyword, + ) }) .collect::>(); diff --git a/crates/nu-command/src/math/math_.rs b/crates/nu-command/src/math/math_.rs index a4a146738f..9f230362ea 100644 --- a/crates/nu-command/src/math/math_.rs +++ b/crates/nu-command/src/math/math_.rs @@ -35,7 +35,7 @@ impl Command for MathCommand { &MathCommand.examples(), engine_state, stack, - self.is_parser_keyword(), + self.is_keyword(), ), call.head, ) diff --git a/crates/nu-command/src/misc/source.rs b/crates/nu-command/src/misc/source.rs index 798b321c6b..08a979b9f5 100644 --- a/crates/nu-command/src/misc/source.rs +++ b/crates/nu-command/src/misc/source.rs @@ -1,4 +1,5 @@ use nu_engine::{command_prelude::*, get_eval_block_with_early_return}; +use nu_protocol::engine::CommandType; /// Source a file for environment variables. #[derive(Clone)] @@ -29,8 +30,8 @@ impl Command for Source { https://www.nushell.sh/book/thinking_in_nu.html"# } - fn is_parser_keyword(&self) -> bool { - true + fn command_type(&self) -> CommandType { + CommandType::Keyword } fn run( diff --git a/crates/nu-command/src/network/http/http_.rs b/crates/nu-command/src/network/http/http_.rs index 15bc96494c..b1e8d64120 100644 --- a/crates/nu-command/src/network/http/http_.rs +++ b/crates/nu-command/src/network/http/http_.rs @@ -41,7 +41,7 @@ impl Command for Http { &Http.examples(), engine_state, stack, - self.is_parser_keyword(), + self.is_keyword(), ), call.head, ) diff --git a/crates/nu-command/src/network/url/url_.rs b/crates/nu-command/src/network/url/url_.rs index 9f795c7eab..49d55c4c6b 100644 --- a/crates/nu-command/src/network/url/url_.rs +++ b/crates/nu-command/src/network/url/url_.rs @@ -39,7 +39,7 @@ impl Command for Url { &Url.examples(), engine_state, stack, - self.is_parser_keyword(), + self.is_keyword(), ), call.head, ) diff --git a/crates/nu-command/src/path/path_.rs b/crates/nu-command/src/path/path_.rs index 19351d590a..2d1d0730fa 100644 --- a/crates/nu-command/src/path/path_.rs +++ b/crates/nu-command/src/path/path_.rs @@ -48,7 +48,7 @@ the path literal."# &PathCommand.examples(), engine_state, stack, - self.is_parser_keyword(), + self.is_keyword(), ), call.head, ) diff --git a/crates/nu-command/src/random/random_.rs b/crates/nu-command/src/random/random_.rs index 21819fa24f..16135000ea 100644 --- a/crates/nu-command/src/random/random_.rs +++ b/crates/nu-command/src/random/random_.rs @@ -39,7 +39,7 @@ impl Command for RandomCommand { &RandomCommand.examples(), engine_state, stack, - self.is_parser_keyword(), + self.is_keyword(), ), call.head, ) diff --git a/crates/nu-command/src/stor/stor_.rs b/crates/nu-command/src/stor/stor_.rs index e736fd8357..3fb6840e7d 100644 --- a/crates/nu-command/src/stor/stor_.rs +++ b/crates/nu-command/src/stor/stor_.rs @@ -35,7 +35,7 @@ impl Command for Stor { &Stor.examples(), engine_state, stack, - self.is_parser_keyword(), + self.is_keyword(), ), call.head, ) diff --git a/crates/nu-command/src/strings/format/format_.rs b/crates/nu-command/src/strings/format/format_.rs index 18159b610f..c51213e4ef 100644 --- a/crates/nu-command/src/strings/format/format_.rs +++ b/crates/nu-command/src/strings/format/format_.rs @@ -35,7 +35,7 @@ impl Command for Format { &Format.examples(), engine_state, stack, - self.is_parser_keyword(), + self.is_keyword(), ), call.head, ) diff --git a/crates/nu-command/src/strings/split/command.rs b/crates/nu-command/src/strings/split/command.rs index cb52cdb44c..0333249c2b 100644 --- a/crates/nu-command/src/strings/split/command.rs +++ b/crates/nu-command/src/strings/split/command.rs @@ -35,7 +35,7 @@ impl Command for SplitCommand { &SplitCommand.examples(), engine_state, stack, - self.is_parser_keyword(), + self.is_keyword(), ), call.head, ) diff --git a/crates/nu-command/src/strings/str_/case/str_.rs b/crates/nu-command/src/strings/str_/case/str_.rs index cf4537f046..56e0d1164f 100644 --- a/crates/nu-command/src/strings/str_/case/str_.rs +++ b/crates/nu-command/src/strings/str_/case/str_.rs @@ -35,7 +35,7 @@ impl Command for Str { &Str.examples(), engine_state, stack, - self.is_parser_keyword(), + self.is_keyword(), ), call.head, ) diff --git a/crates/nu-command/src/system/which_.rs b/crates/nu-command/src/system/which_.rs index 1244a57d99..81b6057864 100644 --- a/crates/nu-command/src/system/which_.rs +++ b/crates/nu-command/src/system/which_.rs @@ -1,5 +1,5 @@ -use log::trace; use nu_engine::{command_prelude::*, env}; +use nu_protocol::engine::CommandType; use std::{ffi::OsStr, path::Path}; #[derive(Clone)] @@ -51,14 +51,14 @@ impl Command for Which { fn entry( arg: impl Into, path: impl Into, - cmd_type: impl Into, + cmd_type: CommandType, span: Span, ) -> Value { Value::record( record! { - "command" => Value::string(arg.into(), span), - "path" => Value::string(path.into(), span), - "type" => Value::string(cmd_type.into(), span), + "command" => Value::string(arg, span), + "path" => Value::string(path, span), + "type" => Value::string(cmd_type.to_string(), span), }, span, ) @@ -66,17 +66,8 @@ fn entry( fn get_entry_in_commands(engine_state: &EngineState, name: &str, span: Span) -> Option { if let Some(decl_id) = engine_state.find_decl(name.as_bytes(), &[]) { - let cmd_type = if engine_state.get_decl(decl_id).is_custom_command() { - "custom" - } else if engine_state.get_decl(decl_id).is_alias() { - "alias" - } else { - "built-in" - }; - - trace!("Found command: {}", name); - - Some(entry(name, "", cmd_type, span)) + let decl = engine_state.get_decl(decl_id); + Some(entry(name, "", decl.command_type(), span)) } else { None } @@ -109,7 +100,7 @@ fn get_first_entry_in_path( paths: impl AsRef, ) -> Option { which::which_in(item, Some(paths), cwd) - .map(|path| entry(item, path.to_string_lossy().to_string(), "external", span)) + .map(|path| entry(item, path.to_string_lossy(), CommandType::External, span)) .ok() } @@ -132,7 +123,7 @@ fn get_all_entries_in_path( ) -> Vec { which::which_in_all(&item, Some(paths), cwd) .map(|iter| { - iter.map(|path| entry(item, path.to_string_lossy().to_string(), "external", span)) + iter.map(|path| entry(item, path.to_string_lossy(), CommandType::External, span)) .collect() }) .unwrap_or_default() diff --git a/crates/nu-engine/src/eval.rs b/crates/nu-engine/src/eval.rs index 0bc0c3727c..02feef3f38 100644 --- a/crates/nu-engine/src/eval.rs +++ b/crates/nu-engine/src/eval.rs @@ -36,10 +36,10 @@ pub fn eval_call( &decl.examples(), engine_state, caller_stack, - decl.is_parser_keyword(), + decl.is_keyword(), ); Ok(Value::string(full_help, call.head).into_pipeline_data()) - } else if let Some(block_id) = decl.get_block_id() { + } else if let Some(block_id) = decl.block_id() { let block = engine_state.get_block(block_id); let mut callee_stack = caller_stack.gather_captures(engine_state, &block.captures); diff --git a/crates/nu-engine/src/scope.rs b/crates/nu-engine/src/scope.rs index 1f5bf2a358..b6a43ca47c 100644 --- a/crates/nu-engine/src/scope.rs +++ b/crates/nu-engine/src/scope.rs @@ -111,13 +111,8 @@ impl<'e, 's> ScopeData<'e, 's> { "signatures" => self.collect_signatures(&signature, span), "usage" => Value::string(decl.usage(), span), "examples" => Value::list(examples, span), - // we can only be a is_builtin or is_custom, not both - "is_builtin" => Value::bool(!decl.is_custom_command(), span), + "type" => Value::string(decl.command_type().to_string(), span), "is_sub" => Value::bool(decl.is_sub(), span), - "is_plugin" => Value::bool(decl.is_plugin(), span), - "is_custom" => Value::bool(decl.is_custom_command(), span), - "is_keyword" => Value::bool(decl.is_parser_keyword(), span), - "is_extern" => Value::bool(decl.is_known_external(), span), "creates_scope" => Value::bool(signature.creates_scope, span), "extra_usage" => Value::string(decl.extra_usage(), span), "search_terms" => Value::string(decl.search_terms().join(", "), span), diff --git a/crates/nu-lsp/src/lib.rs b/crates/nu-lsp/src/lib.rs index 47535d9bd4..44eeeb5756 100644 --- a/crates/nu-lsp/src/lib.rs +++ b/crates/nu-lsp/src/lib.rs @@ -279,7 +279,7 @@ impl LanguageServer { match id { Id::Declaration(decl_id) => { - if let Some(block_id) = working_set.get_decl(decl_id).get_block_id() { + if let Some(block_id) = working_set.get_decl(decl_id).block_id() { let block = working_set.get_block(block_id); if let Some(span) = &block.span { for cached_file in working_set.files() { diff --git a/crates/nu-parser/src/known_external.rs b/crates/nu-parser/src/known_external.rs index d5cc1f2369..1463a3b080 100644 --- a/crates/nu-parser/src/known_external.rs +++ b/crates/nu-parser/src/known_external.rs @@ -1,5 +1,8 @@ use nu_engine::command_prelude::*; -use nu_protocol::ast::{Argument, Expr, Expression}; +use nu_protocol::{ + ast::{Argument, Expr, Expression}, + engine::CommandType, +}; #[derive(Clone)] pub struct KnownExternal { @@ -22,12 +25,8 @@ impl Command for KnownExternal { &self.usage } - fn is_known_external(&self) -> bool { - true - } - - fn is_builtin(&self) -> bool { - false + fn command_type(&self) -> CommandType { + CommandType::External } fn run( diff --git a/crates/nu-parser/src/parse_keywords.rs b/crates/nu-parser/src/parse_keywords.rs index 015873e69a..25a55c8489 100644 --- a/crates/nu-parser/src/parse_keywords.rs +++ b/crates/nu-parser/src/parse_keywords.rs @@ -1020,7 +1020,7 @@ pub fn parse_alias( } => { let cmd = working_set.get_decl(rhs_call.decl_id); - if cmd.is_parser_keyword() + if cmd.is_keyword() && !ALIASABLE_PARSER_KEYWORDS.contains(&cmd.name().as_bytes()) { working_set.error(ParseError::CantAliasKeyword( diff --git a/crates/nu-parser/src/parser.rs b/crates/nu-parser/src/parser.rs index fb36e8b503..61a4261d8d 100644 --- a/crates/nu-parser/src/parser.rs +++ b/crates/nu-parser/src/parser.rs @@ -5966,7 +5966,7 @@ pub fn discover_captures_in_expr( Expr::Bool(_) => {} Expr::Call(call) => { let decl = working_set.get_decl(call.decl_id); - if let Some(block_id) = decl.get_block_id() { + if let Some(block_id) = decl.block_id() { match seen_blocks.get(&block_id) { Some(capture_list) => { // Push captures onto the outer closure that aren't created by that outer closure diff --git a/crates/nu-plugin-engine/src/declaration.rs b/crates/nu-plugin-engine/src/declaration.rs index 7f45ce1507..d48fa39b85 100644 --- a/crates/nu-plugin-engine/src/declaration.rs +++ b/crates/nu-plugin-engine/src/declaration.rs @@ -1,6 +1,6 @@ use nu_engine::{command_prelude::*, get_eval_expression}; use nu_plugin_protocol::{CallInfo, EvaluatedCall}; -use nu_protocol::{PluginIdentity, PluginSignature}; +use nu_protocol::{engine::CommandType, PluginIdentity, PluginSignature}; use std::sync::Arc; use crate::{GetPlugin, PluginExecutionCommandContext, PluginSource}; @@ -116,8 +116,8 @@ impl Command for PluginDeclaration { ) } - fn is_plugin(&self) -> bool { - true + fn command_type(&self) -> CommandType { + CommandType::Plugin } fn plugin_identity(&self) -> Option<&PluginIdentity> { diff --git a/crates/nu-protocol/src/alias.rs b/crates/nu-protocol/src/alias.rs index 47b7e0fd9e..24448225d4 100644 --- a/crates/nu-protocol/src/alias.rs +++ b/crates/nu-protocol/src/alias.rs @@ -1,6 +1,6 @@ use crate::{ ast::{Call, Expression}, - engine::{Command, EngineState, Stack}, + engine::{Command, CommandType, EngineState, Stack}, PipelineData, ShellError, Signature, }; @@ -48,8 +48,8 @@ impl Command for Alias { }) } - fn is_alias(&self) -> bool { - true + fn command_type(&self) -> CommandType { + CommandType::Alias } fn as_alias(&self) -> Option<&Alias> { diff --git a/crates/nu-protocol/src/engine/command.rs b/crates/nu-protocol/src/engine/command.rs index 38119d6f8f..043d2a66c7 100644 --- a/crates/nu-protocol/src/engine/command.rs +++ b/crates/nu-protocol/src/engine/command.rs @@ -1,8 +1,8 @@ -use crate::{ast::Call, Alias, BlockId, Example, OutDest, PipelineData, ShellError, Signature}; - use super::{EngineState, Stack, StateWorkingSet}; +use crate::{ast::Call, Alias, BlockId, Example, OutDest, PipelineData, ShellError, Signature}; +use std::fmt::Display; -#[derive(Clone, Debug, PartialEq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum CommandType { Builtin, Custom, @@ -10,7 +10,20 @@ pub enum CommandType { External, Alias, Plugin, - Other, +} + +impl Display for CommandType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let str = match self { + CommandType::Builtin => "built-in", + CommandType::Custom => "custom", + CommandType::Keyword => "keyword", + CommandType::External => "external", + CommandType::Alias => "alias", + CommandType::Plugin => "plugin", + }; + write!(f, "{str}") + } } pub trait Command: Send + Sync + CommandClone { @@ -49,49 +62,29 @@ pub trait Command: Send + Sync + CommandClone { Vec::new() } - // This is a built-in command - fn is_builtin(&self) -> bool { - true + // Related terms to help with command search + fn search_terms(&self) -> Vec<&str> { + vec![] } - // This is a signature for a known external command - fn is_known_external(&self) -> bool { + // Whether can run in const evaluation in the parser + fn is_const(&self) -> bool { false } - // This is an alias of another command - fn is_alias(&self) -> bool { - false - } - - // Return reference to the command as Alias - fn as_alias(&self) -> Option<&Alias> { - None - } - - // This is an enhanced method to determine if a command is custom command or not - // since extern "foo" [] and def "foo" [] behaves differently - fn is_custom_command(&self) -> bool { - if self.get_block_id().is_some() { - true - } else { - self.is_known_external() - } - } - // Is a sub command fn is_sub(&self) -> bool { self.name().contains(' ') } - // Is a parser keyword (source, def, etc.) - fn is_parser_keyword(&self) -> bool { - false + // If command is a block i.e. def blah [] { }, get the block id + fn block_id(&self) -> Option { + None } - /// Is a plugin command - fn is_plugin(&self) -> bool { - false + // Return reference to the command as Alias + fn as_alias(&self) -> Option<&Alias> { + None } /// The identity of the plugin, if this is a plugin command @@ -100,38 +93,32 @@ pub trait Command: Send + Sync + CommandClone { None } - // Whether can run in const evaluation in the parser - fn is_const(&self) -> bool { - false - } - - // If command is a block i.e. def blah [] { }, get the block id - fn get_block_id(&self) -> Option { - None - } - - // Related terms to help with command search - fn search_terms(&self) -> Vec<&str> { - vec![] - } - fn command_type(&self) -> CommandType { - match ( - self.is_builtin(), - self.is_custom_command(), - self.is_parser_keyword(), - self.is_known_external(), - self.is_alias(), - self.is_plugin(), - ) { - (true, false, false, false, false, false) => CommandType::Builtin, - (true, true, false, false, false, false) => CommandType::Custom, - (true, false, true, false, false, false) => CommandType::Keyword, - (false, true, false, true, false, false) => CommandType::External, - (_, _, _, _, true, _) => CommandType::Alias, - (true, false, false, false, false, true) => CommandType::Plugin, - _ => CommandType::Other, - } + CommandType::Builtin + } + + fn is_builtin(&self) -> bool { + self.command_type() == CommandType::Builtin + } + + fn is_custom(&self) -> bool { + self.command_type() == CommandType::Custom + } + + fn is_keyword(&self) -> bool { + self.command_type() == CommandType::Keyword + } + + fn is_known_external(&self) -> bool { + self.command_type() == CommandType::External + } + + fn is_alias(&self) -> bool { + self.command_type() == CommandType::Alias + } + + fn is_plugin(&self) -> bool { + self.command_type() == CommandType::Plugin } fn pipe_redirection(&self) -> (Option, Option) { diff --git a/crates/nu-protocol/src/engine/engine_state.rs b/crates/nu-protocol/src/engine/engine_state.rs index bea49b5d6c..1948b67d43 100644 --- a/crates/nu-protocol/src/engine/engine_state.rs +++ b/crates/nu-protocol/src/engine/engine_state.rs @@ -794,7 +794,7 @@ impl EngineState { } pub fn get_signature(&self, decl: &dyn Command) -> Signature { - if let Some(block_id) = decl.get_block_id() { + if let Some(block_id) = decl.block_id() { *self.blocks[block_id].signature.clone() } else { decl.signature() @@ -814,26 +814,16 @@ impl EngineState { /// Get signatures of all commands within scope. /// - /// In addition to signatures, it returns whether each command is: - /// a) a plugin - /// b) custom + /// In addition to signatures, it returns each command's examples and type. pub fn get_signatures_with_examples( &self, include_hidden: bool, - ) -> Vec<(Signature, Vec, bool, bool, bool)> { + ) -> Vec<(Signature, Vec, CommandType)> { self.get_decls_sorted(include_hidden) .map(|(_, id)| { let decl = self.get_decl(id); - let signature = self.get_signature(decl).update_from_command(decl); - - ( - signature, - decl.examples(), - decl.is_plugin(), - decl.get_block_id().is_some(), - decl.is_parser_keyword(), - ) + (signature, decl.examples(), decl.command_type()) }) .collect() } diff --git a/crates/nu-protocol/src/pipeline/pipeline_data.rs b/crates/nu-protocol/src/pipeline/pipeline_data.rs index 0f4d1eb826..7faa4ed221 100644 --- a/crates/nu-protocol/src/pipeline/pipeline_data.rs +++ b/crates/nu-protocol/src/pipeline/pipeline_data.rs @@ -550,7 +550,7 @@ impl PipelineData { // to create the table value that will be printed in the terminal if let Some(decl_id) = engine_state.table_decl_id { let command = engine_state.get_decl(decl_id); - if command.get_block_id().is_some() { + if command.block_id().is_some() { self.write_all_and_flush(engine_state, no_newline, to_stderr) } else { let call = Call::new(Span::new(0, 0)); diff --git a/crates/nu-protocol/src/signature.rs b/crates/nu-protocol/src/signature.rs index 7f3a48cc35..70e94b35f1 100644 --- a/crates/nu-protocol/src/signature.rs +++ b/crates/nu-protocol/src/signature.rs @@ -1,6 +1,6 @@ use crate::{ ast::Call, - engine::{Command, EngineState, Stack}, + engine::{Command, CommandType, EngineState, Stack}, BlockId, PipelineData, ShellError, SyntaxShape, Type, Value, VarId, }; use serde::{Deserialize, Serialize}; @@ -703,7 +703,11 @@ impl Command for BlockCommand { }) } - fn get_block_id(&self) -> Option { + fn command_type(&self) -> CommandType { + CommandType::Custom + } + + fn block_id(&self) -> Option { Some(self.block_id) } } diff --git a/src/ide.rs b/src/ide.rs index a3474fe3b6..0a24bcd013 100644 --- a/src/ide.rs +++ b/src/ide.rs @@ -145,7 +145,7 @@ pub fn goto_def(engine_state: &mut EngineState, file_path: &str, location: &Valu match find_id(&mut working_set, file_path, &file, location) { Some((Id::Declaration(decl_id), ..)) => { let result = working_set.get_decl(decl_id); - if let Some(block_id) = result.get_block_id() { + if let Some(block_id) = result.block_id() { let block = working_set.get_block(block_id); if let Some(span) = &block.span { for file in working_set.files() { From 474293bf1cfe1efdf1312de93710f2b741e5260a Mon Sep 17 00:00:00 2001 From: Ian Manske Date: Sun, 19 May 2024 15:35:07 +0000 Subject: [PATCH 052/137] Clear environment for child `Command`s (#12901) # Description There is a bug when `hide-env` is used on environment variables that were present at shell startup. Namely, child processes still inherit the hidden environment variable. This PR fixes #12900, fixes #11495, and fixes #7937. # Tests + Formatting Added a test. --- crates/nu-command/src/system/run_external.rs | 4 ++++ tests/shell/environment/env.rs | 9 +++++++++ 2 files changed, 13 insertions(+) diff --git a/crates/nu-command/src/system/run_external.rs b/crates/nu-command/src/system/run_external.rs index 2941d80de3..b12b89263c 100644 --- a/crates/nu-command/src/system/run_external.rs +++ b/crates/nu-command/src/system/run_external.rs @@ -530,6 +530,9 @@ impl ExternalCommand { } /// Spawn a command without shelling out to an external shell + /// + /// Note that this function will not set the cwd or environment variables. + /// It only creates the command and adds arguments. pub fn spawn_simple_command(&self, cwd: &str) -> Result { let (head, _, _) = trim_enclosing_quotes(&self.name.item); let head = nu_path::expand_to_real_path(head) @@ -537,6 +540,7 @@ impl ExternalCommand { .to_string(); let mut process = std::process::Command::new(head); + process.env_clear(); for (arg, arg_keep_raw) in self.args.iter().zip(self.arg_keep_raw.iter()) { trim_expand_and_apply_arg(&mut process, arg, arg_keep_raw, cwd); diff --git a/tests/shell/environment/env.rs b/tests/shell/environment/env.rs index 7061761c70..74736415a3 100644 --- a/tests/shell/environment/env.rs +++ b/tests/shell/environment/env.rs @@ -126,6 +126,15 @@ fn passes_with_env_env_var_to_external_process() { assert_eq!(actual.out, "foo"); } +#[test] +fn hides_environment_from_child() { + let actual = nu!(r#" + $env.TEST = 1; ^$nu.current-exe -c "hide-env TEST; ^$nu.current-exe -c '$env.TEST'" + "#); + assert!(actual.out.is_empty()); + assert!(actual.err.contains("cannot find column")); +} + #[test] fn has_file_pwd() { Playground::setup("has_file_pwd", |dirs, sandbox| { From baeba19b22c08e7a4c6cf3f1ff4b37df0c54987a Mon Sep 17 00:00:00 2001 From: Ian Manske Date: Sun, 19 May 2024 17:56:33 +0000 Subject: [PATCH 053/137] Make `get_full_help` take `&dyn Command` (#12903) # Description Changes `get_full_help` to take a `&dyn Command` instead of multiple arguments (`&Signature`, `&Examples` `is_parser_keyword`). All of these arguments can be gathered from a `Command`, so there is no need to pass the pieces to `get_full_help`. This PR also fixes an issue where the search terms are not shown if `--help` is used on a command. --- crates/nu-cli/src/commands/keybindings.rs | 12 +---- crates/nu-cli/src/menus/help_completions.rs | 44 +++++++++---------- crates/nu-cmd-dataframe/src/dataframe/stub.rs | 12 +---- crates/nu-cmd-extra/src/extra/bits/bits_.rs | 12 +---- .../src/extra/filters/roll/roll_.rs | 12 +---- .../src/extra/strings/str_/case/str_.rs | 12 +---- .../nu-cmd-lang/src/core_commands/export.rs | 12 +---- .../src/core_commands/overlay/command.rs | 12 +---- .../src/core_commands/scope/command.rs | 12 +---- .../nu-cmd-plugin/src/commands/plugin/mod.rs | 12 +---- crates/nu-command/src/bytes/bytes_.rs | 12 +---- .../src/conversions/into/command.rs | 12 +---- crates/nu-command/src/date/date_.rs | 22 +--------- crates/nu-command/src/debug/view.rs | 12 +---- crates/nu-command/src/env/config/config_.rs | 12 +---- crates/nu-command/src/formats/from/command.rs | 12 +---- crates/nu-command/src/formats/to/command.rs | 12 +---- crates/nu-command/src/hash/hash_.rs | 12 +---- crates/nu-command/src/help/help_commands.rs | 18 +++----- crates/nu-command/src/help/help_externs.rs | 18 +++----- crates/nu-command/src/help/help_modules.rs | 2 + crates/nu-command/src/math/math_.rs | 12 +---- crates/nu-command/src/network/http/http_.rs | 12 +---- crates/nu-command/src/network/url/url_.rs | 12 +---- crates/nu-command/src/path/path_.rs | 12 +---- crates/nu-command/src/random/random_.rs | 12 +---- crates/nu-command/src/stor/stor_.rs | 12 +---- .../nu-command/src/strings/format/format_.rs | 12 +---- .../nu-command/src/strings/split/command.rs | 12 +---- .../nu-command/src/strings/str_/case/str_.rs | 12 +---- crates/nu-engine/src/documentation.rs | 14 +++--- crates/nu-engine/src/eval.rs | 14 +----- crates/nu-plugin-engine/src/context.rs | 10 ++--- crates/nu-protocol/src/engine/engine_state.rs | 26 ++--------- src/command.rs | 22 ++-------- tests/repl/test_engine.rs | 5 +-- 36 files changed, 82 insertions(+), 413 deletions(-) diff --git a/crates/nu-cli/src/commands/keybindings.rs b/crates/nu-cli/src/commands/keybindings.rs index a8c8053a56..347ce983ea 100644 --- a/crates/nu-cli/src/commands/keybindings.rs +++ b/crates/nu-cli/src/commands/keybindings.rs @@ -36,16 +36,6 @@ For more information on input and keybindings, check: call: &Call, _input: PipelineData, ) -> Result { - Ok(Value::string( - get_full_help( - &Keybindings.signature(), - &Keybindings.examples(), - engine_state, - stack, - self.is_keyword(), - ), - call.head, - ) - .into_pipeline_data()) + Ok(Value::string(get_full_help(self, engine_state, stack), call.head).into_pipeline_data()) } } diff --git a/crates/nu-cli/src/menus/help_completions.rs b/crates/nu-cli/src/menus/help_completions.rs index 62f40b9d8d..c9c1b7bf94 100644 --- a/crates/nu-cli/src/menus/help_completions.rs +++ b/crates/nu-cli/src/menus/help_completions.rs @@ -12,50 +12,49 @@ impl NuHelpCompleter { } fn completion_helper(&self, line: &str, pos: usize) -> Vec { - let full_commands = self.0.get_signatures_with_examples(false); let folded_line = line.to_folded_case(); - //Vec<(Signature, Vec, bool, bool)> { - let mut commands = full_commands - .iter() - .filter(|(sig, _, _)| { - sig.name.to_folded_case().contains(&folded_line) - || sig.usage.to_folded_case().contains(&folded_line) - || sig - .search_terms - .iter() + let mut commands = self + .0 + .get_decls_sorted(false) + .into_iter() + .filter_map(|(_, decl_id)| { + let decl = self.0.get_decl(decl_id); + (decl.name().to_folded_case().contains(&folded_line) + || decl.usage().to_folded_case().contains(&folded_line) + || decl + .search_terms() + .into_iter() .any(|term| term.to_folded_case().contains(&folded_line)) - || sig.extra_usage.to_folded_case().contains(&folded_line) + || decl.extra_usage().to_folded_case().contains(&folded_line)) + .then_some(decl) }) .collect::>(); - commands.sort_by(|(a, _, _), (b, _, _)| { - let a_distance = levenshtein_distance(line, &a.name); - let b_distance = levenshtein_distance(line, &b.name); - a_distance.cmp(&b_distance) - }); + commands.sort_by_cached_key(|decl| levenshtein_distance(line, decl.name())); commands .into_iter() - .map(|(sig, examples, _)| { + .map(|decl| { let mut long_desc = String::new(); - let usage = &sig.usage; + let usage = decl.usage(); if !usage.is_empty() { long_desc.push_str(usage); long_desc.push_str("\r\n\r\n"); } - let extra_usage = &sig.extra_usage; + let extra_usage = decl.extra_usage(); if !extra_usage.is_empty() { long_desc.push_str(extra_usage); long_desc.push_str("\r\n\r\n"); } + let sig = decl.signature(); let _ = write!(long_desc, "Usage:\r\n > {}\r\n", sig.call_signature()); if !sig.named.is_empty() { - long_desc.push_str(&get_flags_section(Some(&*self.0.clone()), sig, |v| { + long_desc.push_str(&get_flags_section(Some(&*self.0.clone()), &sig, |v| { v.to_parsable_string(", ", &self.0.config) })) } @@ -93,13 +92,14 @@ impl NuHelpCompleter { } } - let extra: Vec = examples + let extra: Vec = decl + .examples() .iter() .map(|example| example.example.replace('\n', "\r\n")) .collect(); Suggestion { - value: sig.name.clone(), + value: decl.name().into(), description: Some(long_desc), style: None, extra: Some(extra), diff --git a/crates/nu-cmd-dataframe/src/dataframe/stub.rs b/crates/nu-cmd-dataframe/src/dataframe/stub.rs index 58dd2996cd..dfabbe0b82 100644 --- a/crates/nu-cmd-dataframe/src/dataframe/stub.rs +++ b/crates/nu-cmd-dataframe/src/dataframe/stub.rs @@ -29,16 +29,6 @@ impl Command for Dfr { call: &Call, _input: PipelineData, ) -> Result { - Ok(Value::string( - get_full_help( - &Dfr.signature(), - &Dfr.examples(), - engine_state, - stack, - self.is_keyword(), - ), - call.head, - ) - .into_pipeline_data()) + Ok(Value::string(get_full_help(self, engine_state, stack), call.head).into_pipeline_data()) } } diff --git a/crates/nu-cmd-extra/src/extra/bits/bits_.rs b/crates/nu-cmd-extra/src/extra/bits/bits_.rs index d795beda4e..1190c01b4d 100644 --- a/crates/nu-cmd-extra/src/extra/bits/bits_.rs +++ b/crates/nu-cmd-extra/src/extra/bits/bits_.rs @@ -29,16 +29,6 @@ impl Command for Bits { call: &Call, _input: PipelineData, ) -> Result { - Ok(Value::string( - get_full_help( - &Bits.signature(), - &Bits.examples(), - engine_state, - stack, - self.is_keyword(), - ), - call.head, - ) - .into_pipeline_data()) + Ok(Value::string(get_full_help(self, engine_state, stack), call.head).into_pipeline_data()) } } diff --git a/crates/nu-cmd-extra/src/extra/filters/roll/roll_.rs b/crates/nu-cmd-extra/src/extra/filters/roll/roll_.rs index a1622d71c0..867bc2706e 100644 --- a/crates/nu-cmd-extra/src/extra/filters/roll/roll_.rs +++ b/crates/nu-cmd-extra/src/extra/filters/roll/roll_.rs @@ -33,16 +33,6 @@ impl Command for Roll { call: &Call, _input: PipelineData, ) -> Result { - Ok(Value::string( - get_full_help( - &Roll.signature(), - &Roll.examples(), - engine_state, - stack, - self.is_keyword(), - ), - call.head, - ) - .into_pipeline_data()) + Ok(Value::string(get_full_help(self, engine_state, stack), call.head).into_pipeline_data()) } } diff --git a/crates/nu-cmd-extra/src/extra/strings/str_/case/str_.rs b/crates/nu-cmd-extra/src/extra/strings/str_/case/str_.rs index 56e0d1164f..fe6cb86324 100644 --- a/crates/nu-cmd-extra/src/extra/strings/str_/case/str_.rs +++ b/crates/nu-cmd-extra/src/extra/strings/str_/case/str_.rs @@ -29,16 +29,6 @@ impl Command for Str { call: &Call, _input: PipelineData, ) -> Result { - Ok(Value::string( - get_full_help( - &Str.signature(), - &Str.examples(), - engine_state, - stack, - self.is_keyword(), - ), - call.head, - ) - .into_pipeline_data()) + Ok(Value::string(get_full_help(self, engine_state, stack), call.head).into_pipeline_data()) } } diff --git a/crates/nu-cmd-lang/src/core_commands/export.rs b/crates/nu-cmd-lang/src/core_commands/export.rs index 8634a8c06b..565e7895dc 100644 --- a/crates/nu-cmd-lang/src/core_commands/export.rs +++ b/crates/nu-cmd-lang/src/core_commands/export.rs @@ -35,17 +35,7 @@ impl Command for ExportCommand { call: &Call, _input: PipelineData, ) -> Result { - Ok(Value::string( - get_full_help( - &ExportCommand.signature(), - &ExportCommand.examples(), - engine_state, - stack, - self.is_keyword(), - ), - call.head, - ) - .into_pipeline_data()) + Ok(Value::string(get_full_help(self, engine_state, stack), call.head).into_pipeline_data()) } fn examples(&self) -> Vec { diff --git a/crates/nu-cmd-lang/src/core_commands/overlay/command.rs b/crates/nu-cmd-lang/src/core_commands/overlay/command.rs index 72cc28e77c..00ec7438ad 100644 --- a/crates/nu-cmd-lang/src/core_commands/overlay/command.rs +++ b/crates/nu-cmd-lang/src/core_commands/overlay/command.rs @@ -37,16 +37,6 @@ impl Command for Overlay { call: &Call, _input: PipelineData, ) -> Result { - Ok(Value::string( - get_full_help( - &Overlay.signature(), - &[], - engine_state, - stack, - self.is_keyword(), - ), - call.head, - ) - .into_pipeline_data()) + Ok(Value::string(get_full_help(self, engine_state, stack), call.head).into_pipeline_data()) } } diff --git a/crates/nu-cmd-lang/src/core_commands/scope/command.rs b/crates/nu-cmd-lang/src/core_commands/scope/command.rs index 72a9e74932..98439226cf 100644 --- a/crates/nu-cmd-lang/src/core_commands/scope/command.rs +++ b/crates/nu-cmd-lang/src/core_commands/scope/command.rs @@ -31,16 +31,6 @@ impl Command for Scope { call: &Call, _input: PipelineData, ) -> Result { - Ok(Value::string( - get_full_help( - &Scope.signature(), - &[], - engine_state, - stack, - self.is_keyword(), - ), - call.head, - ) - .into_pipeline_data()) + Ok(Value::string(get_full_help(self, engine_state, stack), call.head).into_pipeline_data()) } } diff --git a/crates/nu-cmd-plugin/src/commands/plugin/mod.rs b/crates/nu-cmd-plugin/src/commands/plugin/mod.rs index cf4ee5d9a3..36590e9a8a 100644 --- a/crates/nu-cmd-plugin/src/commands/plugin/mod.rs +++ b/crates/nu-cmd-plugin/src/commands/plugin/mod.rs @@ -37,17 +37,7 @@ impl Command for PluginCommand { call: &Call, _input: PipelineData, ) -> Result { - Ok(Value::string( - get_full_help( - &PluginCommand.signature(), - &PluginCommand.examples(), - engine_state, - stack, - self.is_keyword(), - ), - call.head, - ) - .into_pipeline_data()) + Ok(Value::string(get_full_help(self, engine_state, stack), call.head).into_pipeline_data()) } fn examples(&self) -> Vec { diff --git a/crates/nu-command/src/bytes/bytes_.rs b/crates/nu-command/src/bytes/bytes_.rs index 451ee1e5d5..82bf7c619b 100644 --- a/crates/nu-command/src/bytes/bytes_.rs +++ b/crates/nu-command/src/bytes/bytes_.rs @@ -29,16 +29,6 @@ impl Command for Bytes { call: &Call, _input: PipelineData, ) -> Result { - Ok(Value::string( - get_full_help( - &Bytes.signature(), - &Bytes.examples(), - engine_state, - stack, - self.is_keyword(), - ), - call.head, - ) - .into_pipeline_data()) + Ok(Value::string(get_full_help(self, engine_state, stack), call.head).into_pipeline_data()) } } diff --git a/crates/nu-command/src/conversions/into/command.rs b/crates/nu-command/src/conversions/into/command.rs index 5a8175b298..03b8e81c4a 100644 --- a/crates/nu-command/src/conversions/into/command.rs +++ b/crates/nu-command/src/conversions/into/command.rs @@ -29,16 +29,6 @@ impl Command for Into { call: &Call, _input: PipelineData, ) -> Result { - Ok(Value::string( - get_full_help( - &Into.signature(), - &[], - engine_state, - stack, - self.is_keyword(), - ), - call.head, - ) - .into_pipeline_data()) + Ok(Value::string(get_full_help(self, engine_state, stack), call.head).into_pipeline_data()) } } diff --git a/crates/nu-command/src/date/date_.rs b/crates/nu-command/src/date/date_.rs index 158940cc2e..fd67e9c923 100644 --- a/crates/nu-command/src/date/date_.rs +++ b/crates/nu-command/src/date/date_.rs @@ -42,26 +42,6 @@ impl Command for Date { call: &Call, _input: PipelineData, ) -> Result { - date(engine_state, stack, call) + Ok(Value::string(get_full_help(self, engine_state, stack), call.head).into_pipeline_data()) } } - -fn date( - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, -) -> Result { - let head = call.head; - - Ok(Value::string( - get_full_help( - &Date.signature(), - &Date.examples(), - engine_state, - stack, - false, - ), - head, - ) - .into_pipeline_data()) -} diff --git a/crates/nu-command/src/debug/view.rs b/crates/nu-command/src/debug/view.rs index fcbc377e4b..4ef1c1c7e0 100644 --- a/crates/nu-command/src/debug/view.rs +++ b/crates/nu-command/src/debug/view.rs @@ -29,16 +29,6 @@ impl Command for View { call: &Call, _input: PipelineData, ) -> Result { - Ok(Value::string( - get_full_help( - &View.signature(), - &View.examples(), - engine_state, - stack, - self.is_keyword(), - ), - call.head, - ) - .into_pipeline_data()) + Ok(Value::string(get_full_help(self, engine_state, stack), call.head).into_pipeline_data()) } } diff --git a/crates/nu-command/src/env/config/config_.rs b/crates/nu-command/src/env/config/config_.rs index 948e8248b8..1cd6ef4621 100644 --- a/crates/nu-command/src/env/config/config_.rs +++ b/crates/nu-command/src/env/config/config_.rs @@ -29,17 +29,7 @@ impl Command for ConfigMeta { call: &Call, _input: PipelineData, ) -> Result { - Ok(Value::string( - get_full_help( - &ConfigMeta.signature(), - &ConfigMeta.examples(), - engine_state, - stack, - self.is_keyword(), - ), - call.head, - ) - .into_pipeline_data()) + Ok(Value::string(get_full_help(self, engine_state, stack), call.head).into_pipeline_data()) } fn search_terms(&self) -> Vec<&str> { diff --git a/crates/nu-command/src/formats/from/command.rs b/crates/nu-command/src/formats/from/command.rs index 3df3d86e2e..40085b51d2 100644 --- a/crates/nu-command/src/formats/from/command.rs +++ b/crates/nu-command/src/formats/from/command.rs @@ -29,16 +29,6 @@ impl Command for From { call: &Call, _input: PipelineData, ) -> Result { - Ok(Value::string( - get_full_help( - &From.signature(), - &From.examples(), - engine_state, - stack, - self.is_keyword(), - ), - call.head, - ) - .into_pipeline_data()) + Ok(Value::string(get_full_help(self, engine_state, stack), call.head).into_pipeline_data()) } } diff --git a/crates/nu-command/src/formats/to/command.rs b/crates/nu-command/src/formats/to/command.rs index 1288c2d73b..2138085c87 100644 --- a/crates/nu-command/src/formats/to/command.rs +++ b/crates/nu-command/src/formats/to/command.rs @@ -29,16 +29,6 @@ impl Command for To { call: &Call, _input: PipelineData, ) -> Result { - Ok(Value::string( - get_full_help( - &To.signature(), - &To.examples(), - engine_state, - stack, - self.is_keyword(), - ), - call.head, - ) - .into_pipeline_data()) + Ok(Value::string(get_full_help(self, engine_state, stack), call.head).into_pipeline_data()) } } diff --git a/crates/nu-command/src/hash/hash_.rs b/crates/nu-command/src/hash/hash_.rs index fc7f58cd3b..d4eca79354 100644 --- a/crates/nu-command/src/hash/hash_.rs +++ b/crates/nu-command/src/hash/hash_.rs @@ -29,16 +29,6 @@ impl Command for Hash { call: &Call, _input: PipelineData, ) -> Result { - Ok(Value::string( - get_full_help( - &Self.signature(), - &Self.examples(), - engine_state, - stack, - self.is_keyword(), - ), - call.head, - ) - .into_pipeline_data()) + Ok(Value::string(get_full_help(self, engine_state, stack), call.head).into_pipeline_data()) } } diff --git a/crates/nu-command/src/help/help_commands.rs b/crates/nu-command/src/help/help_commands.rs index 2633595356..f2440cc36f 100644 --- a/crates/nu-command/src/help/help_commands.rs +++ b/crates/nu-command/src/help/help_commands.rs @@ -1,7 +1,6 @@ use crate::help::highlight_search_in_table; use nu_color_config::StyleComputer; use nu_engine::{command_prelude::*, get_full_help}; -use nu_protocol::engine::CommandType; #[derive(Clone)] pub struct HelpCommands; @@ -89,18 +88,13 @@ pub fn help_commands( } let output = engine_state - .get_signatures_with_examples(false) - .iter() - .filter(|(signature, _, _)| signature.name == name) - .map(|(signature, examples, cmd_type)| { - get_full_help( - signature, - examples, - engine_state, - stack, - cmd_type == &CommandType::Keyword, - ) + .get_decls_sorted(false) + .into_iter() + .filter_map(|(_, decl_id)| { + let decl = engine_state.get_decl(decl_id); + (decl.name() == name).then_some(decl) }) + .map(|cmd| get_full_help(cmd, engine_state, stack)) .collect::>(); if !output.is_empty() { diff --git a/crates/nu-command/src/help/help_externs.rs b/crates/nu-command/src/help/help_externs.rs index 0378553463..4a5c8123a4 100644 --- a/crates/nu-command/src/help/help_externs.rs +++ b/crates/nu-command/src/help/help_externs.rs @@ -1,7 +1,6 @@ use crate::help::highlight_search_in_table; use nu_color_config::StyleComputer; use nu_engine::{command_prelude::*, get_full_help, scope::ScopeData}; -use nu_protocol::engine::CommandType; #[derive(Clone)] pub struct HelpExterns; @@ -109,18 +108,13 @@ pub fn help_externs( } let output = engine_state - .get_signatures_with_examples(false) - .iter() - .filter(|(signature, _, _)| signature.name == name) - .map(|(signature, examples, cmd_type)| { - get_full_help( - signature, - examples, - engine_state, - stack, - cmd_type == &CommandType::Keyword, - ) + .get_decls_sorted(false) + .into_iter() + .filter_map(|(_, decl_id)| { + let decl = engine_state.get_decl(decl_id); + (decl.name() == name).then_some(decl) }) + .map(|cmd| get_full_help(cmd, engine_state, stack)) .collect::>(); if !output.is_empty() { diff --git a/crates/nu-command/src/help/help_modules.rs b/crates/nu-command/src/help/help_modules.rs index 690968251b..5b39133a6d 100644 --- a/crates/nu-command/src/help/help_modules.rs +++ b/crates/nu-command/src/help/help_modules.rs @@ -149,6 +149,7 @@ pub fn help_modules( if !module.decls.is_empty() || module.main.is_some() { let commands: Vec<(Vec, DeclId)> = engine_state .get_decls_sorted(false) + .into_iter() .filter(|(_, id)| !engine_state.get_decl(*id).is_alias()) .collect(); @@ -186,6 +187,7 @@ pub fn help_modules( if !module.decls.is_empty() { let aliases: Vec<(Vec, DeclId)> = engine_state .get_decls_sorted(false) + .into_iter() .filter(|(_, id)| engine_state.get_decl(*id).is_alias()) .collect(); diff --git a/crates/nu-command/src/math/math_.rs b/crates/nu-command/src/math/math_.rs index 9f230362ea..2ac067af4e 100644 --- a/crates/nu-command/src/math/math_.rs +++ b/crates/nu-command/src/math/math_.rs @@ -29,16 +29,6 @@ impl Command for MathCommand { call: &Call, _input: PipelineData, ) -> Result { - Ok(Value::string( - get_full_help( - &MathCommand.signature(), - &MathCommand.examples(), - engine_state, - stack, - self.is_keyword(), - ), - call.head, - ) - .into_pipeline_data()) + Ok(Value::string(get_full_help(self, engine_state, stack), call.head).into_pipeline_data()) } } diff --git a/crates/nu-command/src/network/http/http_.rs b/crates/nu-command/src/network/http/http_.rs index b1e8d64120..361033708e 100644 --- a/crates/nu-command/src/network/http/http_.rs +++ b/crates/nu-command/src/network/http/http_.rs @@ -35,16 +35,6 @@ impl Command for Http { call: &Call, _input: PipelineData, ) -> Result { - Ok(Value::string( - get_full_help( - &Http.signature(), - &Http.examples(), - engine_state, - stack, - self.is_keyword(), - ), - call.head, - ) - .into_pipeline_data()) + Ok(Value::string(get_full_help(self, engine_state, stack), call.head).into_pipeline_data()) } } diff --git a/crates/nu-command/src/network/url/url_.rs b/crates/nu-command/src/network/url/url_.rs index 49d55c4c6b..9988063d46 100644 --- a/crates/nu-command/src/network/url/url_.rs +++ b/crates/nu-command/src/network/url/url_.rs @@ -33,16 +33,6 @@ impl Command for Url { call: &Call, _input: PipelineData, ) -> Result { - Ok(Value::string( - get_full_help( - &Url.signature(), - &Url.examples(), - engine_state, - stack, - self.is_keyword(), - ), - call.head, - ) - .into_pipeline_data()) + Ok(Value::string(get_full_help(self, engine_state, stack), call.head).into_pipeline_data()) } } diff --git a/crates/nu-command/src/path/path_.rs b/crates/nu-command/src/path/path_.rs index 2d1d0730fa..667008b658 100644 --- a/crates/nu-command/src/path/path_.rs +++ b/crates/nu-command/src/path/path_.rs @@ -42,16 +42,6 @@ the path literal."# call: &Call, _input: PipelineData, ) -> Result { - Ok(Value::string( - get_full_help( - &PathCommand.signature(), - &PathCommand.examples(), - engine_state, - stack, - self.is_keyword(), - ), - call.head, - ) - .into_pipeline_data()) + Ok(Value::string(get_full_help(self, engine_state, stack), call.head).into_pipeline_data()) } } diff --git a/crates/nu-command/src/random/random_.rs b/crates/nu-command/src/random/random_.rs index 16135000ea..5cf14d7748 100644 --- a/crates/nu-command/src/random/random_.rs +++ b/crates/nu-command/src/random/random_.rs @@ -33,16 +33,6 @@ impl Command for RandomCommand { call: &Call, _input: PipelineData, ) -> Result { - Ok(Value::string( - get_full_help( - &RandomCommand.signature(), - &RandomCommand.examples(), - engine_state, - stack, - self.is_keyword(), - ), - call.head, - ) - .into_pipeline_data()) + Ok(Value::string(get_full_help(self, engine_state, stack), call.head).into_pipeline_data()) } } diff --git a/crates/nu-command/src/stor/stor_.rs b/crates/nu-command/src/stor/stor_.rs index 3fb6840e7d..c5bb378c2d 100644 --- a/crates/nu-command/src/stor/stor_.rs +++ b/crates/nu-command/src/stor/stor_.rs @@ -29,16 +29,6 @@ impl Command for Stor { call: &Call, _input: PipelineData, ) -> Result { - Ok(Value::string( - get_full_help( - &Stor.signature(), - &Stor.examples(), - engine_state, - stack, - self.is_keyword(), - ), - call.head, - ) - .into_pipeline_data()) + Ok(Value::string(get_full_help(self, engine_state, stack), call.head).into_pipeline_data()) } } diff --git a/crates/nu-command/src/strings/format/format_.rs b/crates/nu-command/src/strings/format/format_.rs index c51213e4ef..21b46a8b05 100644 --- a/crates/nu-command/src/strings/format/format_.rs +++ b/crates/nu-command/src/strings/format/format_.rs @@ -29,16 +29,6 @@ impl Command for Format { call: &Call, _input: PipelineData, ) -> Result { - Ok(Value::string( - get_full_help( - &Format.signature(), - &Format.examples(), - engine_state, - stack, - self.is_keyword(), - ), - call.head, - ) - .into_pipeline_data()) + Ok(Value::string(get_full_help(self, engine_state, stack), call.head).into_pipeline_data()) } } diff --git a/crates/nu-command/src/strings/split/command.rs b/crates/nu-command/src/strings/split/command.rs index 0333249c2b..ff9057eacc 100644 --- a/crates/nu-command/src/strings/split/command.rs +++ b/crates/nu-command/src/strings/split/command.rs @@ -29,16 +29,6 @@ impl Command for SplitCommand { call: &Call, _input: PipelineData, ) -> Result { - Ok(Value::string( - get_full_help( - &SplitCommand.signature(), - &SplitCommand.examples(), - engine_state, - stack, - self.is_keyword(), - ), - call.head, - ) - .into_pipeline_data()) + Ok(Value::string(get_full_help(self, engine_state, stack), call.head).into_pipeline_data()) } } diff --git a/crates/nu-command/src/strings/str_/case/str_.rs b/crates/nu-command/src/strings/str_/case/str_.rs index 56e0d1164f..fe6cb86324 100644 --- a/crates/nu-command/src/strings/str_/case/str_.rs +++ b/crates/nu-command/src/strings/str_/case/str_.rs @@ -29,16 +29,6 @@ impl Command for Str { call: &Call, _input: PipelineData, ) -> Result { - Ok(Value::string( - get_full_help( - &Str.signature(), - &Str.examples(), - engine_state, - stack, - self.is_keyword(), - ), - call.head, - ) - .into_pipeline_data()) + Ok(Value::string(get_full_help(self, engine_state, stack), call.head).into_pipeline_data()) } } diff --git a/crates/nu-engine/src/documentation.rs b/crates/nu-engine/src/documentation.rs index 62e68eaa6c..3cd1130060 100644 --- a/crates/nu-engine/src/documentation.rs +++ b/crates/nu-engine/src/documentation.rs @@ -2,18 +2,16 @@ use crate::eval_call; use nu_protocol::{ ast::{Argument, Call, Expr, Expression, RecordItem}, debugger::WithoutDebug, - engine::{EngineState, Stack}, + engine::{Command, EngineState, Stack}, record, Category, Example, IntoPipelineData, PipelineData, Signature, Span, SyntaxShape, Type, Value, }; use std::{collections::HashMap, fmt::Write}; pub fn get_full_help( - sig: &Signature, - examples: &[Example], + command: &dyn Command, engine_state: &EngineState, stack: &mut Stack, - is_parser_keyword: bool, ) -> String { let config = engine_state.get_config(); let doc_config = DocumentationConfig { @@ -23,14 +21,15 @@ pub fn get_full_help( }; let stack = &mut stack.start_capture(); + let signature = command.signature().update_from_command(command); get_documentation( - sig, - examples, + &signature, + &command.examples(), engine_state, stack, &doc_config, - is_parser_keyword, + command.is_keyword(), ) } @@ -61,7 +60,6 @@ fn nu_highlight_string(code_string: &str, engine_state: &EngineState, stack: &mu code_string.to_string() } -#[allow(clippy::cognitive_complexity)] fn get_documentation( sig: &Signature, examples: &[Example], diff --git a/crates/nu-engine/src/eval.rs b/crates/nu-engine/src/eval.rs index 02feef3f38..af051a1dc7 100644 --- a/crates/nu-engine/src/eval.rs +++ b/crates/nu-engine/src/eval.rs @@ -27,18 +27,8 @@ pub fn eval_call( let decl = engine_state.get_decl(call.decl_id); if !decl.is_known_external() && call.named_iter().any(|(flag, _, _)| flag.item == "help") { - let mut signature = engine_state.get_signature(decl); - signature.usage = decl.usage().to_string(); - signature.extra_usage = decl.extra_usage().to_string(); - - let full_help = get_full_help( - &signature, - &decl.examples(), - engine_state, - caller_stack, - decl.is_keyword(), - ); - Ok(Value::string(full_help, call.head).into_pipeline_data()) + let help = get_full_help(decl, engine_state, caller_stack); + Ok(Value::string(help, call.head).into_pipeline_data()) } else if let Some(block_id) = decl.block_id() { let block = engine_state.get_block(block_id); diff --git a/crates/nu-plugin-engine/src/context.rs b/crates/nu-plugin-engine/src/context.rs index 0b1d56c050..d5be6ad4b6 100644 --- a/crates/nu-plugin-engine/src/context.rs +++ b/crates/nu-plugin-engine/src/context.rs @@ -139,14 +139,10 @@ impl<'a> PluginExecutionContext for PluginExecutionCommandContext<'a> { fn get_help(&self) -> Result, ShellError> { let decl = self.engine_state.get_decl(self.call.decl_id); - Ok(get_full_help( - &decl.signature(), - &decl.examples(), - &self.engine_state, - &mut self.stack.clone(), - false, + Ok( + get_full_help(decl, &self.engine_state, &mut self.stack.clone()) + .into_spanned(self.call.head), ) - .into_spanned(self.call.head)) } fn get_span_contents(&self, span: Span) -> Result>, ShellError> { diff --git a/crates/nu-protocol/src/engine/engine_state.rs b/crates/nu-protocol/src/engine/engine_state.rs index 1948b67d43..710ca77d4c 100644 --- a/crates/nu-protocol/src/engine/engine_state.rs +++ b/crates/nu-protocol/src/engine/engine_state.rs @@ -7,7 +7,7 @@ use crate::{ Variable, Visibility, DEFAULT_OVERLAY_NAME, }, eval_const::create_nu_constant, - BlockId, Category, Config, DeclId, Example, FileId, HistoryConfig, Module, ModuleId, OverlayId, + BlockId, Category, Config, DeclId, FileId, HistoryConfig, Module, ModuleId, OverlayId, ShellError, Signature, Span, Type, Value, VarId, VirtualPathId, }; use fancy_regex::Regex; @@ -766,10 +766,7 @@ impl EngineState { } /// Get all commands within scope, sorted by the commands' names - pub fn get_decls_sorted( - &self, - include_hidden: bool, - ) -> impl Iterator, DeclId)> { + pub fn get_decls_sorted(&self, include_hidden: bool) -> Vec<(Vec, DeclId)> { let mut decls_map = HashMap::new(); for overlay_frame in self.active_overlays(&[]) { @@ -790,7 +787,7 @@ impl EngineState { let mut decls: Vec<(Vec, DeclId)> = decls_map.into_iter().collect(); decls.sort_by(|a, b| a.0.cmp(&b.0)); - decls.into_iter() + decls } pub fn get_signature(&self, decl: &dyn Command) -> Signature { @@ -804,6 +801,7 @@ impl EngineState { /// Get signatures of all commands within scope. pub fn get_signatures(&self, include_hidden: bool) -> Vec { self.get_decls_sorted(include_hidden) + .into_iter() .map(|(_, id)| { let decl = self.get_decl(id); @@ -812,22 +810,6 @@ impl EngineState { .collect() } - /// Get signatures of all commands within scope. - /// - /// In addition to signatures, it returns each command's examples and type. - pub fn get_signatures_with_examples( - &self, - include_hidden: bool, - ) -> Vec<(Signature, Vec, CommandType)> { - self.get_decls_sorted(include_hidden) - .map(|(_, id)| { - let decl = self.get_decl(id); - let signature = self.get_signature(decl).update_from_command(decl); - (signature, decl.examples(), decl.command_type()) - }) - .collect() - } - pub fn get_block(&self, block_id: BlockId) -> &Arc { self.blocks .get(block_id) diff --git a/src/command.rs b/src/command.rs index ab7a74884e..7fcf2da1b3 100644 --- a/src/command.rs +++ b/src/command.rs @@ -191,13 +191,7 @@ pub(crate) fn parse_commandline_args( let help = call.has_flag(engine_state, &mut stack, "help")?; if help { - let full_help = get_full_help( - &Nu.signature(), - &Nu.examples(), - engine_state, - &mut stack, - true, - ); + let full_help = get_full_help(&Nu, engine_state, &mut stack); let _ = std::panic::catch_unwind(move || stdout_write_all_and_flush(full_help)); @@ -245,13 +239,7 @@ pub(crate) fn parse_commandline_args( } // Just give the help and exit if the above fails - let full_help = get_full_help( - &Nu.signature(), - &Nu.examples(), - engine_state, - &mut stack, - true, - ); + let full_help = get_full_help(&Nu, engine_state, &mut stack); print!("{full_help}"); std::process::exit(1); } @@ -452,11 +440,7 @@ impl Command for Nu { call: &Call, _input: PipelineData, ) -> Result { - Ok(Value::string( - get_full_help(&Nu.signature(), &Nu.examples(), engine_state, stack, true), - call.head, - ) - .into_pipeline_data()) + Ok(Value::string(get_full_help(self, engine_state, stack), call.head).into_pipeline_data()) } fn examples(&self) -> Vec { diff --git a/tests/repl/test_engine.rs b/tests/repl/test_engine.rs index c1eb919afa..e452295f42 100644 --- a/tests/repl/test_engine.rs +++ b/tests/repl/test_engine.rs @@ -54,8 +54,7 @@ fn in_and_if_else() -> TestResult { #[test] fn help_works_with_missing_requirements() -> TestResult { - let expected_length = "70"; - run_test(r#"each --help | lines | length"#, expected_length) + run_test(r#"each --help | lines | length"#, "72") } #[test] @@ -65,12 +64,12 @@ fn scope_variable() -> TestResult { "int", ) } + #[rstest] #[case("a", "<> nothing")] #[case("b", "<1.23> float")] #[case("flag1", "<> nothing")] #[case("flag2", "<4.56> float")] - fn scope_command_defaults(#[case] var: &str, #[case] exp_result: &str) -> TestResult { run_test( &format!( From c61075e20e328bc33f4765feb6d5fcef5e6039ce Mon Sep 17 00:00:00 2001 From: Devyn Cairns Date: Sun, 19 May 2024 17:35:32 -0700 Subject: [PATCH 054/137] Add string/binary type color to `ByteStream` (#12897) # Description This PR allows byte streams to optionally be colored as being specifically binary or string data, which guarantees that they'll be converted to `Binary` or `String` appropriately on `into_value()`, making them compatible with `Type` guarantees. This makes them significantly more broadly usable for command input and output. There is still an `Unknown` type for byte streams coming from external commands, which uses the same behavior as we previously did where it's a string if it's UTF-8. A small number of commands were updated to take advantage of this, just to prove the point. I will be adding more after this merges. # User-Facing Changes - New types in `describe`: `string (stream)`, `binary (stream)` - These commands now return a stream if their input was a stream: - `into binary` - `into string` - `bytes collect` - `str join` - `first` (binary) - `last` (binary) - `take` (binary) - `skip` (binary) - Streams that are explicitly binary colored will print as a streaming hexdump - example: ```nushell 1.. | each { into binary } | bytes collect ``` # Tests + Formatting I've added some tests to cover it at a basic level, and it doesn't break anything existing, but I do think more would be nice. Some of those will come when I modify more commands to stream. # After Submitting There are a few things I'm not quite satisfied with: - **String trimming behavior.** We automatically trim newlines from streams from external commands, but I don't think we should do this with internal commands. If I call a command that happens to turn my string into a stream, I don't want the newline to suddenly disappear. I changed this to specifically do it only on `Child` and `File`, but I don't know if this is quite right, and maybe we should bring back the old flag for `trim_end_newline` - **Known binary always resulting in a hexdump.** It would be nice to have a `print --raw`, so that we can put binary data on stdout explicitly if we want to. This PR doesn't change how external commands work though - they still dump straight to stdout. Otherwise, here's the normal checklist: - [ ] release notes - [ ] docs update for plugin protocol changes (added `type` field) --------- Co-authored-by: Ian Manske --- crates/nu-cli/src/util.rs | 4 +- .../nu-cmd-lang/src/core_commands/describe.rs | 6 +- crates/nu-command/src/bytes/collect.rs | 58 +- .../nu-command/src/conversions/into/binary.rs | 11 +- .../src/conversions/into/cell_path.rs | 2 +- .../nu-command/src/conversions/into/string.rs | 20 +- crates/nu-command/src/filters/drop/column.rs | 2 +- crates/nu-command/src/filters/first.rs | 43 +- crates/nu-command/src/filters/insert.rs | 4 +- crates/nu-command/src/filters/items.rs | 2 +- crates/nu-command/src/filters/last.rs | 48 +- crates/nu-command/src/filters/skip/skip_.rs | 36 +- crates/nu-command/src/filters/take/take_.rs | 32 +- crates/nu-command/src/filters/tee.rs | 229 +++---- crates/nu-command/src/filters/update.rs | 4 +- crates/nu-command/src/filters/upsert.rs | 4 +- crates/nu-command/src/filters/values.rs | 2 +- crates/nu-command/src/formats/to/text.rs | 7 +- crates/nu-command/src/network/http/client.rs | 12 +- crates/nu-command/src/strings/str_/join.rs | 50 +- crates/nu-command/src/system/run_external.rs | 1 + crates/nu-command/src/viewers/table.rs | 86 ++- .../tests/commands/bytes/collect.rs | 27 + crates/nu-command/tests/commands/bytes/mod.rs | 1 + crates/nu-command/tests/commands/first.rs | 14 + crates/nu-command/tests/commands/last.rs | 14 + crates/nu-command/tests/commands/mod.rs | 1 + .../nu-command/tests/commands/skip/skip_.rs | 16 +- .../commands/str_/{collect.rs => join.rs} | 12 + crates/nu-command/tests/commands/str_/mod.rs | 2 +- crates/nu-command/tests/commands/take/rows.rs | 14 + crates/nu-engine/src/command_prelude.rs | 6 +- crates/nu-plugin-core/src/interface/mod.rs | 5 +- crates/nu-plugin-core/src/interface/tests.rs | 12 +- .../nu-plugin-engine/src/interface/tests.rs | 6 +- crates/nu-plugin-protocol/src/lib.rs | 6 +- .../nu-plugin/src/plugin/interface/tests.rs | 5 +- crates/nu-pretty-hex/src/pretty_hex.rs | 50 +- crates/nu-protocol/src/errors/shell_error.rs | 10 +- .../nu-protocol/src/pipeline/byte_stream.rs | 604 ++++++++++++++---- .../nu-protocol/src/pipeline/pipeline_data.rs | 51 +- .../src/commands/collect_bytes.rs | 4 +- 42 files changed, 1107 insertions(+), 416 deletions(-) create mode 100644 crates/nu-command/tests/commands/bytes/collect.rs create mode 100644 crates/nu-command/tests/commands/bytes/mod.rs rename crates/nu-command/tests/commands/str_/{collect.rs => join.rs} (65%) diff --git a/crates/nu-cli/src/util.rs b/crates/nu-cli/src/util.rs index 7ebea0deb2..e4912e012f 100644 --- a/crates/nu-cli/src/util.rs +++ b/crates/nu-cli/src/util.rs @@ -276,8 +276,8 @@ fn evaluate_source( eval_block::(engine_state, stack, &block, input) }?; - let status = if let PipelineData::ByteStream(stream, ..) = pipeline { - stream.print(false)? + let status = if let PipelineData::ByteStream(..) = pipeline { + pipeline.print(engine_state, stack, false, false)? } else { if let Some(hook) = engine_state.get_config().hooks.display_output.clone() { let pipeline = eval_hook( diff --git a/crates/nu-cmd-lang/src/core_commands/describe.rs b/crates/nu-cmd-lang/src/core_commands/describe.rs index 7d6d7f6f83..3d992f3f33 100644 --- a/crates/nu-cmd-lang/src/core_commands/describe.rs +++ b/crates/nu-cmd-lang/src/core_commands/describe.rs @@ -163,6 +163,8 @@ fn run( let description = match input { PipelineData::ByteStream(stream, ..) => { + let type_ = stream.type_().describe(); + let description = if options.detailed { let origin = match stream.source() { ByteStreamSource::Read(_) => "unknown", @@ -172,14 +174,14 @@ fn run( Value::record( record! { - "type" => Value::string("byte stream", head), + "type" => Value::string(type_, head), "origin" => Value::string(origin, head), "metadata" => metadata_to_value(metadata, head), }, head, ) } else { - Value::string("byte stream", head) + Value::string(type_, head) }; if !options.no_collect { diff --git a/crates/nu-command/src/bytes/collect.rs b/crates/nu-command/src/bytes/collect.rs index 9cd34496e4..74ea3e5d14 100644 --- a/crates/nu-command/src/bytes/collect.rs +++ b/crates/nu-command/src/bytes/collect.rs @@ -1,3 +1,4 @@ +use itertools::Itertools; use nu_engine::command_prelude::*; #[derive(Clone, Copy)] @@ -35,46 +36,33 @@ impl Command for BytesCollect { input: PipelineData, ) -> Result { let separator: Option> = call.opt(engine_state, stack, 0)?; + + let span = call.head; + // input should be a list of binary data. - let mut output_binary = vec![]; - for value in input { - match value { - Value::Binary { mut val, .. } => { - output_binary.append(&mut val); - // manually concat - // TODO: make use of std::slice::Join when it's available in stable. - if let Some(sep) = &separator { - let mut work_sep = sep.clone(); - output_binary.append(&mut work_sep) - } - } - // Explicitly propagate errors instead of dropping them. - Value::Error { error, .. } => return Err(*error), - other => { - return Err(ShellError::OnlySupportsThisInputType { + let metadata = input.metadata(); + let iter = Itertools::intersperse( + input.into_iter_strict(span)?.map(move |value| { + // Everything is wrapped in Some in case there's a separator, so we can flatten + Some(match value { + // Explicitly propagate errors instead of dropping them. + Value::Error { error, .. } => Err(*error), + Value::Binary { val, .. } => Ok(val), + other => Err(ShellError::OnlySupportsThisInputType { exp_input_type: "binary".into(), wrong_type: other.get_type().to_string(), - dst_span: call.head, + dst_span: span, src_span: other.span(), - }); - } - } - } + }), + }) + }), + Ok(separator).transpose(), + ) + .flatten(); - match separator { - None => Ok(Value::binary(output_binary, call.head).into_pipeline_data()), - Some(sep) => { - if output_binary.is_empty() { - Ok(Value::binary(output_binary, call.head).into_pipeline_data()) - } else { - // have push one extra separator in previous step, pop them out. - for _ in sep { - let _ = output_binary.pop(); - } - Ok(Value::binary(output_binary, call.head).into_pipeline_data()) - } - } - } + let output = ByteStream::from_result_iter(iter, span, None, ByteStreamType::Binary); + + Ok(PipelineData::ByteStream(output, metadata)) } fn examples(&self) -> Vec { diff --git a/crates/nu-command/src/conversions/into/binary.rs b/crates/nu-command/src/conversions/into/binary.rs index 479b0fc7d7..8eb7715754 100644 --- a/crates/nu-command/src/conversions/into/binary.rs +++ b/crates/nu-command/src/conversions/into/binary.rs @@ -127,15 +127,18 @@ fn into_binary( let cell_paths = call.rest(engine_state, stack, 0)?; let cell_paths = (!cell_paths.is_empty()).then_some(cell_paths); - if let PipelineData::ByteStream(stream, ..) = input { - // TODO: in the future, we may want this to stream out, converting each to bytes - Ok(Value::binary(stream.into_bytes()?, head).into_pipeline_data()) + if let PipelineData::ByteStream(stream, metadata) = input { + // Just set the type - that should be good enough + Ok(PipelineData::ByteStream( + stream.with_type(ByteStreamType::Binary), + metadata, + )) } else { let args = Arguments { cell_paths, compact: call.has_flag(engine_state, stack, "compact")?, }; - operate(action, args, input, call.head, engine_state.ctrlc.clone()) + operate(action, args, input, head, engine_state.ctrlc.clone()) } } diff --git a/crates/nu-command/src/conversions/into/cell_path.rs b/crates/nu-command/src/conversions/into/cell_path.rs index 6da317abd3..c05dad57ba 100644 --- a/crates/nu-command/src/conversions/into/cell_path.rs +++ b/crates/nu-command/src/conversions/into/cell_path.rs @@ -103,7 +103,7 @@ fn into_cell_path(call: &Call, input: PipelineData) -> Result Err(ShellError::OnlySupportsThisInputType { exp_input_type: "list, int".into(), - wrong_type: "byte stream".into(), + wrong_type: stream.type_().describe().into(), dst_span: head, src_span: stream.span(), }), diff --git a/crates/nu-command/src/conversions/into/string.rs b/crates/nu-command/src/conversions/into/string.rs index eda4f7e5a5..c0731b2e20 100644 --- a/crates/nu-command/src/conversions/into/string.rs +++ b/crates/nu-command/src/conversions/into/string.rs @@ -156,9 +156,23 @@ fn string_helper( let cell_paths = call.rest(engine_state, stack, 0)?; let cell_paths = (!cell_paths.is_empty()).then_some(cell_paths); - if let PipelineData::ByteStream(stream, ..) = input { - // TODO: in the future, we may want this to stream out, converting each to bytes - Ok(Value::string(stream.into_string()?, head).into_pipeline_data()) + if let PipelineData::ByteStream(stream, metadata) = input { + // Just set the type - that should be good enough. There is no guarantee that the data + // within a string stream is actually valid UTF-8. But refuse to do it if it was already set + // to binary + if stream.type_() != ByteStreamType::Binary { + Ok(PipelineData::ByteStream( + stream.with_type(ByteStreamType::String), + metadata, + )) + } else { + Err(ShellError::CantConvert { + to_type: "string".into(), + from_type: "binary".into(), + span: stream.span(), + help: Some("try using the `decode` command".into()), + }) + } } else { let config = engine_state.get_config().clone(); let args = Arguments { diff --git a/crates/nu-command/src/filters/drop/column.rs b/crates/nu-command/src/filters/drop/column.rs index 01c13deee4..94c0308ea8 100644 --- a/crates/nu-command/src/filters/drop/column.rs +++ b/crates/nu-command/src/filters/drop/column.rs @@ -135,7 +135,7 @@ fn drop_cols( PipelineData::Empty => Ok(PipelineData::Empty), PipelineData::ByteStream(stream, ..) => Err(ShellError::OnlySupportsThisInputType { exp_input_type: "table or record".into(), - wrong_type: "byte stream".into(), + wrong_type: stream.type_().describe().into(), dst_span: head, src_span: stream.span(), }), diff --git a/crates/nu-command/src/filters/first.rs b/crates/nu-command/src/filters/first.rs index e581c3e84d..f625847a3f 100644 --- a/crates/nu-command/src/filters/first.rs +++ b/crates/nu-command/src/filters/first.rs @@ -170,12 +170,43 @@ fn first_helper( )) } } - PipelineData::ByteStream(stream, ..) => Err(ShellError::OnlySupportsThisInputType { - exp_input_type: "list, binary or range".into(), - wrong_type: "byte stream".into(), - dst_span: head, - src_span: stream.span(), - }), + PipelineData::ByteStream(stream, metadata) => { + if stream.type_() == ByteStreamType::Binary { + let span = stream.span(); + if let Some(mut reader) = stream.reader() { + use std::io::Read; + if return_single_element { + // Take a single byte + let mut byte = [0u8]; + if reader.read(&mut byte).err_span(span)? > 0 { + Ok(Value::int(byte[0] as i64, head).into_pipeline_data()) + } else { + Err(ShellError::AccessEmptyContent { span: head }) + } + } else { + // Just take 'rows' bytes off the stream, mimicking the binary behavior + Ok(PipelineData::ByteStream( + ByteStream::read( + reader.take(rows as u64), + head, + None, + ByteStreamType::Binary, + ), + metadata, + )) + } + } else { + Ok(PipelineData::Empty) + } + } else { + Err(ShellError::OnlySupportsThisInputType { + exp_input_type: "list, binary or range".into(), + wrong_type: stream.type_().describe().into(), + dst_span: head, + src_span: stream.span(), + }) + } + } PipelineData::Empty => Err(ShellError::OnlySupportsThisInputType { exp_input_type: "list, binary or range".into(), wrong_type: "null".into(), diff --git a/crates/nu-command/src/filters/insert.rs b/crates/nu-command/src/filters/insert.rs index e8794304c8..5f1380b2ac 100644 --- a/crates/nu-command/src/filters/insert.rs +++ b/crates/nu-command/src/filters/insert.rs @@ -261,8 +261,8 @@ fn insert( type_name: "empty pipeline".to_string(), span: head, }), - PipelineData::ByteStream(..) => Err(ShellError::IncompatiblePathAccess { - type_name: "byte stream".to_string(), + PipelineData::ByteStream(stream, ..) => Err(ShellError::IncompatiblePathAccess { + type_name: stream.type_().describe().into(), span: head, }), } diff --git a/crates/nu-command/src/filters/items.rs b/crates/nu-command/src/filters/items.rs index 6afc0bc536..ed30486bee 100644 --- a/crates/nu-command/src/filters/items.rs +++ b/crates/nu-command/src/filters/items.rs @@ -86,7 +86,7 @@ impl Command for Items { }), PipelineData::ByteStream(stream, ..) => Err(ShellError::OnlySupportsThisInputType { exp_input_type: "record".into(), - wrong_type: "byte stream".into(), + wrong_type: stream.type_().describe().into(), dst_span: call.head, src_span: stream.span(), }), diff --git a/crates/nu-command/src/filters/last.rs b/crates/nu-command/src/filters/last.rs index 7530126c26..510e6457a8 100644 --- a/crates/nu-command/src/filters/last.rs +++ b/crates/nu-command/src/filters/last.rs @@ -160,12 +160,48 @@ impl Command for Last { }), } } - PipelineData::ByteStream(stream, ..) => Err(ShellError::OnlySupportsThisInputType { - exp_input_type: "list, binary or range".into(), - wrong_type: "byte stream".into(), - dst_span: head, - src_span: stream.span(), - }), + PipelineData::ByteStream(stream, ..) => { + if stream.type_() == ByteStreamType::Binary { + let span = stream.span(); + if let Some(mut reader) = stream.reader() { + use std::io::Read; + // Have to be a bit tricky here, but just consume into a VecDeque that we + // shrink to fit each time + const TAKE: u64 = 8192; + let mut buf = VecDeque::with_capacity(rows + TAKE as usize); + loop { + let taken = std::io::copy(&mut (&mut reader).take(TAKE), &mut buf) + .err_span(span)?; + if buf.len() > rows { + buf.drain(..(buf.len() - rows)); + } + if taken < TAKE { + // This must be EOF. + if return_single_element { + if !buf.is_empty() { + return Ok( + Value::int(buf[0] as i64, head).into_pipeline_data() + ); + } else { + return Err(ShellError::AccessEmptyContent { span: head }); + } + } else { + return Ok(Value::binary(buf, head).into_pipeline_data()); + } + } + } + } else { + Ok(PipelineData::Empty) + } + } else { + Err(ShellError::OnlySupportsThisInputType { + exp_input_type: "list, binary or range".into(), + wrong_type: stream.type_().describe().into(), + dst_span: head, + src_span: stream.span(), + }) + } + } PipelineData::Empty => Err(ShellError::OnlySupportsThisInputType { exp_input_type: "list, binary or range".into(), wrong_type: "null".into(), diff --git a/crates/nu-command/src/filters/skip/skip_.rs b/crates/nu-command/src/filters/skip/skip_.rs index 9048b34a58..df53cfacba 100644 --- a/crates/nu-command/src/filters/skip/skip_.rs +++ b/crates/nu-command/src/filters/skip/skip_.rs @@ -12,6 +12,7 @@ impl Command for Skip { Signature::build(self.name()) .input_output_types(vec![ (Type::table(), Type::table()), + (Type::Binary, Type::Binary), ( Type::List(Box::new(Type::Any)), Type::List(Box::new(Type::Any)), @@ -51,6 +52,11 @@ impl Command for Skip { "editions" => Value::test_int(2021), })])), }, + Example { + description: "Skip 2 bytes of a binary value", + example: "0x[01 23 45 67] | skip 2", + result: Some(Value::test_binary(vec![0x45, 0x67])), + }, ] } fn run( @@ -87,12 +93,30 @@ impl Command for Skip { let ctrlc = engine_state.ctrlc.clone(); let input_span = input.span().unwrap_or(call.head); match input { - PipelineData::ByteStream(stream, ..) => Err(ShellError::OnlySupportsThisInputType { - exp_input_type: "list, binary or range".into(), - wrong_type: "byte stream".into(), - dst_span: call.head, - src_span: stream.span(), - }), + PipelineData::ByteStream(stream, metadata) => { + if stream.type_() == ByteStreamType::Binary { + let span = stream.span(); + if let Some(mut reader) = stream.reader() { + use std::io::Read; + // Copy the number of skipped bytes into the sink before proceeding + std::io::copy(&mut (&mut reader).take(n as u64), &mut std::io::sink()) + .err_span(span)?; + Ok(PipelineData::ByteStream( + ByteStream::read(reader, call.head, None, ByteStreamType::Binary), + metadata, + )) + } else { + Ok(PipelineData::Empty) + } + } else { + Err(ShellError::OnlySupportsThisInputType { + exp_input_type: "list, binary or range".into(), + wrong_type: stream.type_().describe().into(), + dst_span: call.head, + src_span: stream.span(), + }) + } + } PipelineData::Value(Value::Binary { val, .. }, metadata) => { let bytes = val.into_iter().skip(n).collect::>(); Ok(Value::binary(bytes, input_span).into_pipeline_data_with_metadata(metadata)) diff --git a/crates/nu-command/src/filters/take/take_.rs b/crates/nu-command/src/filters/take/take_.rs index 12840aa8d6..d4bf455c4a 100644 --- a/crates/nu-command/src/filters/take/take_.rs +++ b/crates/nu-command/src/filters/take/take_.rs @@ -78,12 +78,32 @@ impl Command for Take { stream.modify(|iter| iter.take(rows_desired)), metadata, )), - PipelineData::ByteStream(stream, ..) => Err(ShellError::OnlySupportsThisInputType { - exp_input_type: "list, binary or range".into(), - wrong_type: "byte stream".into(), - dst_span: head, - src_span: stream.span(), - }), + PipelineData::ByteStream(stream, metadata) => { + if stream.type_() == ByteStreamType::Binary { + if let Some(reader) = stream.reader() { + use std::io::Read; + // Just take 'rows' bytes off the stream, mimicking the binary behavior + Ok(PipelineData::ByteStream( + ByteStream::read( + reader.take(rows_desired as u64), + head, + None, + ByteStreamType::Binary, + ), + metadata, + )) + } else { + Ok(PipelineData::Empty) + } + } else { + Err(ShellError::OnlySupportsThisInputType { + exp_input_type: "list, binary or range".into(), + wrong_type: stream.type_().describe().into(), + dst_span: head, + src_span: stream.span(), + }) + } + } PipelineData::Empty => Err(ShellError::OnlySupportsThisInputType { exp_input_type: "list, binary or range".into(), wrong_type: "null".into(), diff --git a/crates/nu-command/src/filters/tee.rs b/crates/nu-command/src/filters/tee.rs index 936dee5c79..d6decd3bc6 100644 --- a/crates/nu-command/src/filters/tee.rs +++ b/crates/nu-command/src/filters/tee.rs @@ -1,7 +1,7 @@ use nu_engine::{command_prelude::*, get_eval_block_with_early_return}; use nu_protocol::{ byte_stream::copy_with_interrupt, engine::Closure, process::ChildPipe, ByteStream, - ByteStreamSource, OutDest, + ByteStreamSource, OutDest, PipelineMetadata, }; use std::{ io::{self, Read, Write}, @@ -104,9 +104,13 @@ use it in your pipeline."# if let PipelineData::ByteStream(stream, metadata) = input { let span = stream.span(); let ctrlc = engine_state.ctrlc.clone(); - let eval_block = { - let metadata = metadata.clone(); - move |stream| eval_block(PipelineData::ByteStream(stream, metadata)) + let type_ = stream.type_(); + + let info = StreamInfo { + span, + ctrlc: ctrlc.clone(), + type_, + metadata: metadata.clone(), }; match stream.into_source() { @@ -115,10 +119,11 @@ use it in your pipeline."# return stderr_misuse(span, head); } - let tee = IoTee::new(read, span, eval_block)?; + let tee_thread = spawn_tee(info, eval_block)?; + let tee = IoTee::new(read, tee_thread); Ok(PipelineData::ByteStream( - ByteStream::read(tee, span, ctrlc), + ByteStream::read(tee, span, ctrlc, type_), metadata, )) } @@ -127,44 +132,32 @@ use it in your pipeline."# return stderr_misuse(span, head); } - let tee = IoTee::new(file, span, eval_block)?; + let tee_thread = spawn_tee(info, eval_block)?; + let tee = IoTee::new(file, tee_thread); Ok(PipelineData::ByteStream( - ByteStream::read(tee, span, ctrlc), + ByteStream::read(tee, span, ctrlc, type_), metadata, )) } ByteStreamSource::Child(mut child) => { let stderr_thread = if use_stderr { let stderr_thread = if let Some(stderr) = child.stderr.take() { + let tee_thread = spawn_tee(info.clone(), eval_block)?; + let tee = IoTee::new(stderr, tee_thread); match stack.stderr() { OutDest::Pipe | OutDest::Capture => { - let tee = IoTee::new(stderr, span, eval_block)?; child.stderr = Some(ChildPipe::Tee(Box::new(tee))); - None + Ok(None) } - OutDest::Null => Some(tee_pipe_on_thread( - stderr, - io::sink(), - span, - ctrlc.as_ref(), - eval_block, - )?), - OutDest::Inherit => Some(tee_pipe_on_thread( - stderr, - io::stderr(), - span, - ctrlc.as_ref(), - eval_block, - )?), - OutDest::File(file) => Some(tee_pipe_on_thread( - stderr, - file.clone(), - span, - ctrlc.as_ref(), - eval_block, - )?), - } + OutDest::Null => copy_on_thread(tee, io::sink(), &info).map(Some), + OutDest::Inherit => { + copy_on_thread(tee, io::stderr(), &info).map(Some) + } + OutDest::File(file) => { + copy_on_thread(tee, file.clone(), &info).map(Some) + } + }? } else { None }; @@ -175,37 +168,29 @@ use it in your pipeline."# child.stdout = Some(stdout); Ok(()) } - OutDest::Null => { - copy_pipe(stdout, io::sink(), span, ctrlc.as_deref()) - } - OutDest::Inherit => { - copy_pipe(stdout, io::stdout(), span, ctrlc.as_deref()) - } - OutDest::File(file) => { - copy_pipe(stdout, file.as_ref(), span, ctrlc.as_deref()) - } + OutDest::Null => copy_pipe(stdout, io::sink(), &info), + OutDest::Inherit => copy_pipe(stdout, io::stdout(), &info), + OutDest::File(file) => copy_pipe(stdout, file.as_ref(), &info), }?; } stderr_thread } else { let stderr_thread = if let Some(stderr) = child.stderr.take() { + let info = info.clone(); match stack.stderr() { OutDest::Pipe | OutDest::Capture => { child.stderr = Some(stderr); Ok(None) } OutDest::Null => { - copy_pipe_on_thread(stderr, io::sink(), span, ctrlc.as_ref()) - .map(Some) + copy_pipe_on_thread(stderr, io::sink(), &info).map(Some) } OutDest::Inherit => { - copy_pipe_on_thread(stderr, io::stderr(), span, ctrlc.as_ref()) - .map(Some) + copy_pipe_on_thread(stderr, io::stderr(), &info).map(Some) } OutDest::File(file) => { - copy_pipe_on_thread(stderr, file.clone(), span, ctrlc.as_ref()) - .map(Some) + copy_pipe_on_thread(stderr, file.clone(), &info).map(Some) } }? } else { @@ -213,29 +198,16 @@ use it in your pipeline."# }; if let Some(stdout) = child.stdout.take() { + let tee_thread = spawn_tee(info.clone(), eval_block)?; + let tee = IoTee::new(stdout, tee_thread); match stack.stdout() { OutDest::Pipe | OutDest::Capture => { - let tee = IoTee::new(stdout, span, eval_block)?; child.stdout = Some(ChildPipe::Tee(Box::new(tee))); Ok(()) } - OutDest::Null => { - tee_pipe(stdout, io::sink(), span, ctrlc.as_deref(), eval_block) - } - OutDest::Inherit => tee_pipe( - stdout, - io::stdout(), - span, - ctrlc.as_deref(), - eval_block, - ), - OutDest::File(file) => tee_pipe( - stdout, - file.as_ref(), - span, - ctrlc.as_deref(), - eval_block, - ), + OutDest::Null => copy(tee, io::sink(), &info), + OutDest::Inherit => copy(tee, io::stdout(), &info), + OutDest::File(file) => copy(tee, file.as_ref(), &info), }?; } @@ -350,7 +322,7 @@ where fn stderr_misuse(span: Span, head: Span) -> Result { Err(ShellError::UnsupportedInput { msg: "--stderr can only be used on external commands".into(), - input: "the input to `tee` is not an external commands".into(), + input: "the input to `tee` is not an external command".into(), msg_span: head, input_span: span, }) @@ -363,23 +335,12 @@ struct IoTee { } impl IoTee { - fn new( - reader: R, - span: Span, - eval_block: impl FnOnce(ByteStream) -> Result<(), ShellError> + Send + 'static, - ) -> Result { - let (sender, receiver) = mpsc::channel(); - - let thread = thread::Builder::new() - .name("tee".into()) - .spawn(move || eval_block(ByteStream::from_iter(receiver, span, None))) - .err_span(span)?; - - Ok(Self { + fn new(reader: R, tee: TeeThread) -> Self { + Self { reader, - sender: Some(sender), - thread: Some(thread), - }) + sender: Some(tee.sender), + thread: Some(tee.thread), + } } } @@ -411,68 +372,74 @@ impl Read for IoTee { } } -fn tee_pipe( - pipe: ChildPipe, - mut dest: impl Write, +struct TeeThread { + sender: Sender>, + thread: JoinHandle>, +} + +fn spawn_tee( + info: StreamInfo, + mut eval_block: impl FnMut(PipelineData) -> Result<(), ShellError> + Send + 'static, +) -> Result { + let (sender, receiver) = mpsc::channel(); + + let thread = thread::Builder::new() + .name("tee".into()) + .spawn(move || { + // We don't use ctrlc here because we assume it already has it on the other side + let stream = ByteStream::from_iter(receiver.into_iter(), info.span, None, info.type_); + eval_block(PipelineData::ByteStream(stream, info.metadata)) + }) + .err_span(info.span)?; + + Ok(TeeThread { sender, thread }) +} + +#[derive(Clone)] +struct StreamInfo { span: Span, - ctrlc: Option<&AtomicBool>, - eval_block: impl FnOnce(ByteStream) -> Result<(), ShellError> + Send + 'static, -) -> Result<(), ShellError> { - match pipe { - ChildPipe::Pipe(pipe) => { - let mut tee = IoTee::new(pipe, span, eval_block)?; - copy_with_interrupt(&mut tee, &mut dest, span, ctrlc)?; - } - ChildPipe::Tee(tee) => { - let mut tee = IoTee::new(tee, span, eval_block)?; - copy_with_interrupt(&mut tee, &mut dest, span, ctrlc)?; - } - } + ctrlc: Option>, + type_: ByteStreamType, + metadata: Option, +} + +fn copy(mut src: impl Read, mut dest: impl Write, info: &StreamInfo) -> Result<(), ShellError> { + copy_with_interrupt(&mut src, &mut dest, info.span, info.ctrlc.as_deref())?; Ok(()) } -fn tee_pipe_on_thread( - pipe: ChildPipe, - dest: impl Write + Send + 'static, - span: Span, - ctrlc: Option<&Arc>, - eval_block: impl FnOnce(ByteStream) -> Result<(), ShellError> + Send + 'static, +fn copy_pipe(pipe: ChildPipe, dest: impl Write, info: &StreamInfo) -> Result<(), ShellError> { + match pipe { + ChildPipe::Pipe(pipe) => copy(pipe, dest, info), + ChildPipe::Tee(tee) => copy(tee, dest, info), + } +} + +fn copy_on_thread( + mut src: impl Read + Send + 'static, + mut dest: impl Write + Send + 'static, + info: &StreamInfo, ) -> Result>, ShellError> { - let ctrlc = ctrlc.cloned(); + let span = info.span; + let ctrlc = info.ctrlc.clone(); thread::Builder::new() - .name("stderr tee".into()) - .spawn(move || tee_pipe(pipe, dest, span, ctrlc.as_deref(), eval_block)) + .name("stderr copier".into()) + .spawn(move || { + copy_with_interrupt(&mut src, &mut dest, span, ctrlc.as_deref())?; + Ok(()) + }) .map_err(|e| e.into_spanned(span).into()) } -fn copy_pipe( - pipe: ChildPipe, - mut dest: impl Write, - span: Span, - ctrlc: Option<&AtomicBool>, -) -> Result<(), ShellError> { - match pipe { - ChildPipe::Pipe(mut pipe) => { - copy_with_interrupt(&mut pipe, &mut dest, span, ctrlc)?; - } - ChildPipe::Tee(mut tee) => { - copy_with_interrupt(&mut tee, &mut dest, span, ctrlc)?; - } - } - Ok(()) -} - fn copy_pipe_on_thread( pipe: ChildPipe, dest: impl Write + Send + 'static, - span: Span, - ctrlc: Option<&Arc>, + info: &StreamInfo, ) -> Result>, ShellError> { - let ctrlc = ctrlc.cloned(); - thread::Builder::new() - .name("stderr copier".into()) - .spawn(move || copy_pipe(pipe, dest, span, ctrlc.as_deref())) - .map_err(|e| e.into_spanned(span).into()) + match pipe { + ChildPipe::Pipe(pipe) => copy_on_thread(pipe, dest, info), + ChildPipe::Tee(tee) => copy_on_thread(tee, dest, info), + } } #[test] diff --git a/crates/nu-command/src/filters/update.rs b/crates/nu-command/src/filters/update.rs index 0d914d2d8e..e724ae77ad 100644 --- a/crates/nu-command/src/filters/update.rs +++ b/crates/nu-command/src/filters/update.rs @@ -225,8 +225,8 @@ fn update( type_name: "empty pipeline".to_string(), span: head, }), - PipelineData::ByteStream(..) => Err(ShellError::IncompatiblePathAccess { - type_name: "byte stream".to_string(), + PipelineData::ByteStream(stream, ..) => Err(ShellError::IncompatiblePathAccess { + type_name: stream.type_().describe().into(), span: head, }), } diff --git a/crates/nu-command/src/filters/upsert.rs b/crates/nu-command/src/filters/upsert.rs index 4313addd89..e3678972fb 100644 --- a/crates/nu-command/src/filters/upsert.rs +++ b/crates/nu-command/src/filters/upsert.rs @@ -285,8 +285,8 @@ fn upsert( type_name: "empty pipeline".to_string(), span: head, }), - PipelineData::ByteStream(..) => Err(ShellError::IncompatiblePathAccess { - type_name: "byte stream".to_string(), + PipelineData::ByteStream(stream, ..) => Err(ShellError::IncompatiblePathAccess { + type_name: stream.type_().describe().into(), span: head, }), } diff --git a/crates/nu-command/src/filters/values.rs b/crates/nu-command/src/filters/values.rs index ed33ebf643..f6ff8cda2e 100644 --- a/crates/nu-command/src/filters/values.rs +++ b/crates/nu-command/src/filters/values.rs @@ -182,7 +182,7 @@ fn values( } PipelineData::ByteStream(stream, ..) => Err(ShellError::OnlySupportsThisInputType { exp_input_type: "record or table".into(), - wrong_type: "byte stream".into(), + wrong_type: stream.type_().describe().into(), dst_span: head, src_span: stream.span(), }), diff --git a/crates/nu-command/src/formats/to/text.rs b/crates/nu-command/src/formats/to/text.rs index 7f1d632c13..fb240654f6 100644 --- a/crates/nu-command/src/formats/to/text.rs +++ b/crates/nu-command/src/formats/to/text.rs @@ -51,7 +51,12 @@ impl Command for ToText { str }); Ok(PipelineData::ByteStream( - ByteStream::from_iter(iter, span, engine_state.ctrlc.clone()), + ByteStream::from_iter( + iter, + span, + engine_state.ctrlc.clone(), + ByteStreamType::String, + ), meta, )) } diff --git a/crates/nu-command/src/network/http/client.rs b/crates/nu-command/src/network/http/client.rs index 54f7749627..8317fb50bc 100644 --- a/crates/nu-command/src/network/http/client.rs +++ b/crates/nu-command/src/network/http/client.rs @@ -117,10 +117,20 @@ pub fn response_to_buffer( _ => None, }; + // Try to guess whether the response is definitely intended to binary or definitely intended to + // be UTF-8 text. Otherwise specify `None` and just guess. This doesn't have to be thorough. + let content_type_lowercase = response.header("content-type").map(|s| s.to_lowercase()); + let response_type = match content_type_lowercase.as_deref() { + Some("application/octet-stream") => ByteStreamType::Binary, + Some(h) if h.contains("charset=utf-8") => ByteStreamType::String, + _ => ByteStreamType::Unknown, + }; + let reader = response.into_reader(); PipelineData::ByteStream( - ByteStream::read(reader, span, engine_state.ctrlc.clone()).with_known_size(buffer_size), + ByteStream::read(reader, span, engine_state.ctrlc.clone(), response_type) + .with_known_size(buffer_size), None, ) } diff --git a/crates/nu-command/src/strings/str_/join.rs b/crates/nu-command/src/strings/str_/join.rs index 732434b20f..dd3a87dd61 100644 --- a/crates/nu-command/src/strings/str_/join.rs +++ b/crates/nu-command/src/strings/str_/join.rs @@ -1,4 +1,5 @@ use nu_engine::command_prelude::*; +use std::io::Write; #[derive(Clone)] pub struct StrJoin; @@ -40,31 +41,40 @@ impl Command for StrJoin { ) -> Result { let separator: Option = call.opt(engine_state, stack, 0)?; - let config = engine_state.get_config(); + let config = engine_state.config.clone(); - // let output = input.collect_string(&separator.unwrap_or_default(), &config)?; - // Hmm, not sure what we actually want. - // `to_formatted_string` formats dates as human readable which feels funny. - let mut strings: Vec = vec![]; + let span = call.head; - for value in input { - let str = match value { - Value::Error { error, .. } => { - return Err(*error); + let metadata = input.metadata(); + let mut iter = input.into_iter(); + let mut first = true; + + let output = ByteStream::from_fn(span, None, ByteStreamType::String, move |buffer| { + // Write each input to the buffer + if let Some(value) = iter.next() { + // Write the separator if this is not the first + if first { + first = false; + } else if let Some(separator) = &separator { + write!(buffer, "{}", separator)?; } - Value::Date { val, .. } => format!("{val:?}"), - value => value.to_expanded_string("\n", config), - }; - strings.push(str); - } - let output = if let Some(separator) = separator { - strings.join(&separator) - } else { - strings.join("") - }; + match value { + Value::Error { error, .. } => { + return Err(*error); + } + // Hmm, not sure what we actually want. + // `to_expanded_string` formats dates as human readable which feels funny. + Value::Date { val, .. } => write!(buffer, "{val:?}")?, + value => write!(buffer, "{}", value.to_expanded_string("\n", &config))?, + } + Ok(true) + } else { + Ok(false) + } + }); - Ok(Value::string(output, call.head).into_pipeline_data()) + Ok(PipelineData::ByteStream(output, metadata)) } fn examples(&self) -> Vec { diff --git a/crates/nu-command/src/system/run_external.rs b/crates/nu-command/src/system/run_external.rs index b12b89263c..b37d3a2fcb 100644 --- a/crates/nu-command/src/system/run_external.rs +++ b/crates/nu-command/src/system/run_external.rs @@ -416,6 +416,7 @@ impl ExternalCommand { .name("external stdin worker".to_string()) .spawn(move || { let input = match input { + // Don't touch binary input or byte streams input @ PipelineData::ByteStream(..) => input, input @ PipelineData::Value(Value::Binary { .. }, ..) => input, input => { diff --git a/crates/nu-command/src/viewers/table.rs b/crates/nu-command/src/viewers/table.rs index 26b8c921c5..2fe9319821 100644 --- a/crates/nu-command/src/viewers/table.rs +++ b/crates/nu-command/src/viewers/table.rs @@ -5,6 +5,7 @@ use lscolors::{LsColors, Style}; use nu_color_config::{color_from_hex, StyleComputer, TextStyle}; use nu_engine::{command_prelude::*, env::get_config, env_to_string}; +use nu_pretty_hex::HexConfig; use nu_protocol::{ ByteStream, Config, DataSource, ListStream, PipelineMetadata, TableMode, ValueIterator, }; @@ -15,7 +16,7 @@ use nu_table::{ use nu_utils::get_ls_colors; use std::{ collections::VecDeque, - io::{Cursor, IsTerminal}, + io::{IsTerminal, Read}, path::PathBuf, str::FromStr, sync::{atomic::AtomicBool, Arc}, @@ -364,16 +365,18 @@ fn handle_table_command( ) -> Result { let span = input.data.span().unwrap_or(input.call.head); match input.data { + // Binary streams should behave as if they really are `binary` data, and printed as hex + PipelineData::ByteStream(stream, _) if stream.type_() == ByteStreamType::Binary => Ok( + PipelineData::ByteStream(pretty_hex_stream(stream, input.call.head), None), + ), PipelineData::ByteStream(..) => Ok(input.data), PipelineData::Value(Value::Binary { val, .. }, ..) => { - let bytes = { - let mut str = nu_pretty_hex::pretty_hex(&val); - str.push('\n'); - str.into_bytes() - }; let ctrlc = input.engine_state.ctrlc.clone(); - let stream = ByteStream::read(Cursor::new(bytes), input.call.head, ctrlc); - Ok(PipelineData::ByteStream(stream, None)) + let stream = ByteStream::read_binary(val, input.call.head, ctrlc); + Ok(PipelineData::ByteStream( + pretty_hex_stream(stream, input.call.head), + None, + )) } // None of these two receive a StyleComputer because handle_row_stream() can produce it by itself using engine_state and stack. PipelineData::Value(Value::List { vals, .. }, metadata) => { @@ -410,6 +413,70 @@ fn handle_table_command( } } +fn pretty_hex_stream(stream: ByteStream, span: Span) -> ByteStream { + let mut cfg = HexConfig { + // We are going to render the title manually first + title: true, + // If building on 32-bit, the stream size might be bigger than a usize + length: stream.known_size().and_then(|sz| sz.try_into().ok()), + ..HexConfig::default() + }; + + // This won't really work for us + debug_assert!(cfg.width > 0, "the default hex config width was zero"); + + let mut read_buf = Vec::with_capacity(cfg.width); + + let mut reader = if let Some(reader) = stream.reader() { + reader + } else { + // No stream to read from + return ByteStream::read_string("".into(), span, None); + }; + + ByteStream::from_fn(span, None, ByteStreamType::String, move |buffer| { + // Turn the buffer into a String we can write to + let mut write_buf = std::mem::take(buffer); + write_buf.clear(); + // SAFETY: we just truncated it empty + let mut write_buf = unsafe { String::from_utf8_unchecked(write_buf) }; + + // Write the title at the beginning + if cfg.title { + nu_pretty_hex::write_title(&mut write_buf, cfg, true).expect("format error"); + cfg.title = false; + + // Put the write_buf back into buffer + *buffer = write_buf.into_bytes(); + + Ok(true) + } else { + // Read up to `cfg.width` bytes + read_buf.clear(); + (&mut reader) + .take(cfg.width as u64) + .read_to_end(&mut read_buf) + .err_span(span)?; + + if !read_buf.is_empty() { + nu_pretty_hex::hex_write(&mut write_buf, &read_buf, cfg, Some(true)) + .expect("format error"); + write_buf.push('\n'); + + // Advance the address offset for next time + cfg.address_offset += read_buf.len(); + + // Put the write_buf back into buffer + *buffer = write_buf.into_bytes(); + + Ok(true) + } else { + Ok(false) + } + } + }) +} + fn handle_record( input: CmdInput, cfg: TableConfig, @@ -608,7 +675,8 @@ fn handle_row_stream( ctrlc.clone(), cfg, ); - let stream = ByteStream::from_result_iter(paginator, input.call.head, None); + let stream = + ByteStream::from_result_iter(paginator, input.call.head, None, ByteStreamType::String); Ok(PipelineData::ByteStream(stream, None)) } diff --git a/crates/nu-command/tests/commands/bytes/collect.rs b/crates/nu-command/tests/commands/bytes/collect.rs new file mode 100644 index 0000000000..768ab16df4 --- /dev/null +++ b/crates/nu-command/tests/commands/bytes/collect.rs @@ -0,0 +1,27 @@ +use nu_test_support::{nu, pipeline}; + +#[test] +fn test_stream() { + let actual = nu!(pipeline( + " + [0x[01] 0x[02] 0x[03] 0x[04]] + | filter {true} + | bytes collect 0x[aa aa] + | encode hex + " + )); + assert_eq!(actual.out, "01AAAA02AAAA03AAAA04"); +} + +#[test] +fn test_stream_type() { + let actual = nu!(pipeline( + " + [0x[01] 0x[02] 0x[03] 0x[04]] + | filter {true} + | bytes collect 0x[00] + | describe -n + " + )); + assert_eq!(actual.out, "binary (stream)"); +} diff --git a/crates/nu-command/tests/commands/bytes/mod.rs b/crates/nu-command/tests/commands/bytes/mod.rs new file mode 100644 index 0000000000..10b2a494f8 --- /dev/null +++ b/crates/nu-command/tests/commands/bytes/mod.rs @@ -0,0 +1 @@ +mod collect; diff --git a/crates/nu-command/tests/commands/first.rs b/crates/nu-command/tests/commands/first.rs index e01478f820..23ccda6669 100644 --- a/crates/nu-command/tests/commands/first.rs +++ b/crates/nu-command/tests/commands/first.rs @@ -68,6 +68,20 @@ fn gets_first_byte() { assert_eq!(actual.out, "170"); } +#[test] +fn gets_first_bytes_from_stream() { + let actual = nu!("(1.. | each { 0x[aa bb cc] } | bytes collect | first 2) == 0x[aa bb]"); + + assert_eq!(actual.out, "true"); +} + +#[test] +fn gets_first_byte_from_stream() { + let actual = nu!("1.. | each { 0x[aa bb cc] } | bytes collect | first"); + + assert_eq!(actual.out, "170"); +} + #[test] // covers a situation where `first` used to behave strangely on list input fn works_with_binary_list() { diff --git a/crates/nu-command/tests/commands/last.rs b/crates/nu-command/tests/commands/last.rs index b0c67e49be..986b433ea7 100644 --- a/crates/nu-command/tests/commands/last.rs +++ b/crates/nu-command/tests/commands/last.rs @@ -68,6 +68,20 @@ fn gets_last_byte() { assert_eq!(actual.out, "204"); } +#[test] +fn gets_last_bytes_from_stream() { + let actual = nu!("(1..10 | each { 0x[aa bb cc] } | bytes collect | last 2) == 0x[bb cc]"); + + assert_eq!(actual.out, "true"); +} + +#[test] +fn gets_last_byte_from_stream() { + let actual = nu!("1..10 | each { 0x[aa bb cc] } | bytes collect | last"); + + assert_eq!(actual.out, "204"); +} + #[test] fn last_errors_on_negative_index() { let actual = nu!("[1, 2, 3] | last -2"); diff --git a/crates/nu-command/tests/commands/mod.rs b/crates/nu-command/tests/commands/mod.rs index d7215e002b..922e804405 100644 --- a/crates/nu-command/tests/commands/mod.rs +++ b/crates/nu-command/tests/commands/mod.rs @@ -4,6 +4,7 @@ mod any; mod append; mod assignment; mod break_; +mod bytes; mod cal; mod cd; mod compact; diff --git a/crates/nu-command/tests/commands/skip/skip_.rs b/crates/nu-command/tests/commands/skip/skip_.rs index 790c58db4e..c98de496c8 100644 --- a/crates/nu-command/tests/commands/skip/skip_.rs +++ b/crates/nu-command/tests/commands/skip/skip_.rs @@ -1,13 +1,17 @@ use nu_test_support::nu; #[test] -fn binary_skip_will_raise_error() { - let actual = nu!( - cwd: "tests/fixtures/formats", - "open sample_data.ods --raw | skip 2" - ); +fn skips_bytes() { + let actual = nu!("(0x[aa bb cc] | skip 2) == 0x[cc]"); - assert!(actual.err.contains("only_supports_this_input_type")); + assert_eq!(actual.out, "true"); +} + +#[test] +fn skips_bytes_from_stream() { + let actual = nu!("([0 1] | each { 0x[aa bb cc] } | bytes collect | skip 2) == 0x[cc aa bb cc]"); + + assert_eq!(actual.out, "true"); } #[test] diff --git a/crates/nu-command/tests/commands/str_/collect.rs b/crates/nu-command/tests/commands/str_/join.rs similarity index 65% rename from crates/nu-command/tests/commands/str_/collect.rs rename to crates/nu-command/tests/commands/str_/join.rs index 154ce30537..e04652e810 100644 --- a/crates/nu-command/tests/commands/str_/collect.rs +++ b/crates/nu-command/tests/commands/str_/join.rs @@ -22,6 +22,18 @@ fn test_2() { assert_eq!(actual.out, "abcd"); } +#[test] +fn test_stream() { + let actual = nu!("[a b c d] | filter {true} | str join ."); + assert_eq!(actual.out, "a.b.c.d"); +} + +#[test] +fn test_stream_type() { + let actual = nu!("[a b c d] | filter {true} | str join . | describe -n"); + assert_eq!(actual.out, "string (stream)"); +} + #[test] fn construct_a_path() { let actual = nu!(pipeline( diff --git a/crates/nu-command/tests/commands/str_/mod.rs b/crates/nu-command/tests/commands/str_/mod.rs index 9f1e90e853..9efa28b1ef 100644 --- a/crates/nu-command/tests/commands/str_/mod.rs +++ b/crates/nu-command/tests/commands/str_/mod.rs @@ -1,5 +1,5 @@ -mod collect; mod into_string; +mod join; use nu_test_support::fs::Stub::FileWithContent; use nu_test_support::playground::Playground; diff --git a/crates/nu-command/tests/commands/take/rows.rs b/crates/nu-command/tests/commands/take/rows.rs index d5f3d1c601..6c34b61310 100644 --- a/crates/nu-command/tests/commands/take/rows.rs +++ b/crates/nu-command/tests/commands/take/rows.rs @@ -35,6 +35,20 @@ fn fails_on_string() { assert!(actual.err.contains("command doesn't support")); } +#[test] +fn takes_bytes() { + let actual = nu!("(0x[aa bb cc] | take 2) == 0x[aa bb]"); + + assert_eq!(actual.out, "true"); +} + +#[test] +fn takes_bytes_from_stream() { + let actual = nu!("(1.. | each { 0x[aa bb cc] } | bytes collect | take 2) == 0x[aa bb]"); + + assert_eq!(actual.out, "true"); +} + #[test] // covers a situation where `take` used to behave strangely on list input fn works_with_binary_list() { diff --git a/crates/nu-engine/src/command_prelude.rs b/crates/nu-engine/src/command_prelude.rs index 089a2fb8fa..112f280db5 100644 --- a/crates/nu-engine/src/command_prelude.rs +++ b/crates/nu-engine/src/command_prelude.rs @@ -2,7 +2,7 @@ pub use crate::CallExt; pub use nu_protocol::{ ast::{Call, CellPath}, engine::{Command, EngineState, Stack}, - record, Category, ErrSpan, Example, IntoInterruptiblePipelineData, IntoPipelineData, - IntoSpanned, PipelineData, Record, ShellError, Signature, Span, Spanned, SyntaxShape, Type, - Value, + record, ByteStream, ByteStreamType, Category, ErrSpan, Example, IntoInterruptiblePipelineData, + IntoPipelineData, IntoSpanned, PipelineData, Record, ShellError, Signature, Span, Spanned, + SyntaxShape, Type, Value, }; diff --git a/crates/nu-plugin-core/src/interface/mod.rs b/crates/nu-plugin-core/src/interface/mod.rs index b4a2bc9a25..4f287f39c0 100644 --- a/crates/nu-plugin-core/src/interface/mod.rs +++ b/crates/nu-plugin-core/src/interface/mod.rs @@ -183,7 +183,7 @@ pub trait InterfaceManager { PipelineDataHeader::ByteStream(info) => { let handle = self.stream_manager().get_handle(); let reader = handle.read_stream(info.id, self.get_interface())?; - ByteStream::from_result_iter(reader, info.span, ctrlc.cloned()).into() + ByteStream::from_result_iter(reader, info.span, ctrlc.cloned(), info.type_).into() } }) } @@ -261,9 +261,10 @@ pub trait Interface: Clone + Send { } PipelineData::ByteStream(stream, ..) => { let span = stream.span(); + let type_ = stream.type_(); if let Some(reader) = stream.reader() { let (id, writer) = new_stream(RAW_STREAM_HIGH_PRESSURE)?; - let header = PipelineDataHeader::ByteStream(ByteStreamInfo { id, span }); + let header = PipelineDataHeader::ByteStream(ByteStreamInfo { id, span, type_ }); Ok((header, PipelineDataWriter::ByteStream(writer, reader))) } else { Ok((PipelineDataHeader::Empty, PipelineDataWriter::None)) diff --git a/crates/nu-plugin-core/src/interface/tests.rs b/crates/nu-plugin-core/src/interface/tests.rs index fb3d737190..e318a2648e 100644 --- a/crates/nu-plugin-core/src/interface/tests.rs +++ b/crates/nu-plugin-core/src/interface/tests.rs @@ -10,8 +10,8 @@ use nu_plugin_protocol::{ StreamMessage, }; use nu_protocol::{ - ByteStream, ByteStreamSource, DataSource, ListStream, PipelineData, PipelineMetadata, - ShellError, Span, Value, + ByteStream, ByteStreamSource, ByteStreamType, DataSource, ListStream, PipelineData, + PipelineMetadata, ShellError, Span, Value, }; use std::{path::Path, sync::Arc}; @@ -208,6 +208,7 @@ fn read_pipeline_data_byte_stream() -> Result<(), ShellError> { let header = PipelineDataHeader::ByteStream(ByteStreamInfo { id: 12, span: test_span, + type_: ByteStreamType::Unknown, }); let pipe = manager.read_pipeline_data(header, None)?; @@ -401,7 +402,12 @@ fn write_pipeline_data_byte_stream() -> Result<(), ShellError> { // Set up pipeline data for a byte stream let data = PipelineData::ByteStream( - ByteStream::read(std::io::Cursor::new(expected), span, None), + ByteStream::read( + std::io::Cursor::new(expected), + span, + None, + ByteStreamType::Unknown, + ), None, ); diff --git a/crates/nu-plugin-engine/src/interface/tests.rs b/crates/nu-plugin-engine/src/interface/tests.rs index aca59a664e..e718886b3b 100644 --- a/crates/nu-plugin-engine/src/interface/tests.rs +++ b/crates/nu-plugin-engine/src/interface/tests.rs @@ -17,8 +17,8 @@ use nu_plugin_protocol::{ use nu_protocol::{ ast::{Math, Operator}, engine::Closure, - CustomValue, IntoInterruptiblePipelineData, IntoSpanned, PipelineData, PluginSignature, - ShellError, Span, Spanned, Value, + ByteStreamType, CustomValue, IntoInterruptiblePipelineData, IntoSpanned, PipelineData, + PluginSignature, ShellError, Span, Spanned, Value, }; use serde::{Deserialize, Serialize}; use std::{ @@ -157,6 +157,7 @@ fn manager_consume_all_propagates_message_error_to_readers() -> Result<(), Shell PipelineDataHeader::ByteStream(ByteStreamInfo { id: 0, span: Span::test_data(), + type_: ByteStreamType::Unknown, }), None, )?; @@ -384,6 +385,7 @@ fn manager_consume_call_response_registers_streams() -> Result<(), ShellError> { PluginCallResponse::PipelineData(PipelineDataHeader::ByteStream(ByteStreamInfo { id: 1, span: Span::test_data(), + type_: ByteStreamType::Unknown, })), ))?; diff --git a/crates/nu-plugin-protocol/src/lib.rs b/crates/nu-plugin-protocol/src/lib.rs index ea27f82654..db19ee02f6 100644 --- a/crates/nu-plugin-protocol/src/lib.rs +++ b/crates/nu-plugin-protocol/src/lib.rs @@ -22,8 +22,8 @@ mod tests; pub mod test_util; use nu_protocol::{ - ast::Operator, engine::Closure, Config, LabeledError, PipelineData, PluginSignature, - ShellError, Span, Spanned, Value, + ast::Operator, engine::Closure, ByteStreamType, Config, LabeledError, PipelineData, + PluginSignature, ShellError, Span, Spanned, Value, }; use serde::{Deserialize, Serialize}; use std::collections::HashMap; @@ -112,6 +112,8 @@ pub struct ListStreamInfo { pub struct ByteStreamInfo { pub id: StreamId, pub span: Span, + #[serde(rename = "type")] + pub type_: ByteStreamType, } /// Calls that a plugin can execute. The type parameter determines the input type. diff --git a/crates/nu-plugin/src/plugin/interface/tests.rs b/crates/nu-plugin/src/plugin/interface/tests.rs index ed04190712..6c3dfdf6c9 100644 --- a/crates/nu-plugin/src/plugin/interface/tests.rs +++ b/crates/nu-plugin/src/plugin/interface/tests.rs @@ -9,8 +9,8 @@ use nu_plugin_protocol::{ PluginCustomValue, PluginInput, PluginOutput, Protocol, ProtocolInfo, StreamData, }; use nu_protocol::{ - engine::Closure, Config, CustomValue, IntoInterruptiblePipelineData, LabeledError, - PipelineData, PluginSignature, ShellError, Span, Spanned, Value, + engine::Closure, ByteStreamType, Config, CustomValue, IntoInterruptiblePipelineData, + LabeledError, PipelineData, PluginSignature, ShellError, Span, Spanned, Value, }; use std::{ collections::HashMap, @@ -160,6 +160,7 @@ fn manager_consume_all_propagates_message_error_to_readers() -> Result<(), Shell PipelineDataHeader::ByteStream(ByteStreamInfo { id: 0, span: Span::test_data(), + type_: ByteStreamType::Unknown, }), None, )?; diff --git a/crates/nu-pretty-hex/src/pretty_hex.rs b/crates/nu-pretty-hex/src/pretty_hex.rs index 81bd5451c4..2fab2a9b43 100644 --- a/crates/nu-pretty-hex/src/pretty_hex.rs +++ b/crates/nu-pretty-hex/src/pretty_hex.rs @@ -174,20 +174,14 @@ where .collect(); if cfg.title { - if use_color { - writeln!( - writer, - "Length: {0} (0x{0:x}) bytes | {1}printable {2}whitespace {3}ascii_other {4}non_ascii{5}", - source_part_vec.len(), - Style::default().fg(Color::Cyan).bold().prefix(), - Style::default().fg(Color::Green).bold().prefix(), - Style::default().fg(Color::Purple).bold().prefix(), - Style::default().fg(Color::Yellow).bold().prefix(), - Style::default().fg(Color::Yellow).suffix() - )?; - } else { - writeln!(writer, "Length: {0} (0x{0:x}) bytes", source_part_vec.len(),)?; - } + write_title( + writer, + HexConfig { + length: Some(source_part_vec.len()), + ..cfg + }, + use_color, + )?; } let lines = source_part_vec.chunks(if cfg.width > 0 { @@ -256,6 +250,34 @@ where Ok(()) } +/// Write the title for the given config. The length will be taken from `cfg.length`. +pub fn write_title(writer: &mut W, cfg: HexConfig, use_color: bool) -> Result<(), fmt::Error> +where + W: fmt::Write, +{ + let write = |writer: &mut W, length: fmt::Arguments<'_>| { + if use_color { + writeln!( + writer, + "Length: {length} | {0}printable {1}whitespace {2}ascii_other {3}non_ascii{4}", + Style::default().fg(Color::Cyan).bold().prefix(), + Style::default().fg(Color::Green).bold().prefix(), + Style::default().fg(Color::Purple).bold().prefix(), + Style::default().fg(Color::Yellow).bold().prefix(), + Style::default().fg(Color::Yellow).suffix() + ) + } else { + writeln!(writer, "Length: {length}") + } + }; + + if let Some(len) = cfg.length { + write(writer, format_args!("{len} (0x{len:x}) bytes")) + } else { + write(writer, format_args!("unknown (stream)")) + } +} + /// Reference wrapper for use in arguments formatting. pub struct Hex<'a, T: 'a>(&'a T, HexConfig); diff --git a/crates/nu-protocol/src/errors/shell_error.rs b/crates/nu-protocol/src/errors/shell_error.rs index 525f32e925..81139d1a52 100644 --- a/crates/nu-protocol/src/errors/shell_error.rs +++ b/crates/nu-protocol/src/errors/shell_error.rs @@ -1017,7 +1017,10 @@ pub enum ShellError { /// /// Check your input's encoding. Are there any funny characters/bytes? #[error("Non-UTF8 string")] - #[diagnostic(code(nu::parser::non_utf8))] + #[diagnostic( + code(nu::parser::non_utf8), + help("see `decode` for handling character sets other than UTF-8") + )] NonUtf8 { #[label("non-UTF8 string")] span: Span, @@ -1029,7 +1032,10 @@ pub enum ShellError { /// /// Check your input's encoding. Are there any funny characters/bytes? #[error("Non-UTF8 string")] - #[diagnostic(code(nu::parser::non_utf8_custom))] + #[diagnostic( + code(nu::parser::non_utf8_custom), + help("see `decode` for handling character sets other than UTF-8") + )] NonUtf8Custom { msg: String, #[label("{msg}")] diff --git a/crates/nu-protocol/src/pipeline/byte_stream.rs b/crates/nu-protocol/src/pipeline/byte_stream.rs index 64b566a625..e77c2cc855 100644 --- a/crates/nu-protocol/src/pipeline/byte_stream.rs +++ b/crates/nu-protocol/src/pipeline/byte_stream.rs @@ -1,6 +1,8 @@ +use serde::{Deserialize, Serialize}; + use crate::{ process::{ChildPipe, ChildProcess, ExitStatus}, - ErrSpan, IntoSpanned, OutDest, PipelineData, ShellError, Span, Value, + ErrSpan, IntoSpanned, OutDest, PipelineData, ShellError, Span, Type, Value, }; #[cfg(unix)] use std::os::fd::OwnedFd; @@ -41,6 +43,24 @@ impl ByteStreamSource { }), } } + + /// Source is a `Child` or `File`, rather than `Read`. Currently affects trimming + fn is_external(&self) -> bool { + matches!( + self, + ByteStreamSource::File(..) | ByteStreamSource::Child(..) + ) + } +} + +impl Debug for ByteStreamSource { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + ByteStreamSource::Read(_) => f.debug_tuple("Read").field(&"..").finish(), + ByteStreamSource::File(file) => f.debug_tuple("File").field(file).finish(), + ByteStreamSource::Child(child) => f.debug_tuple("Child").field(child).finish(), + } + } } enum SourceReader { @@ -57,6 +77,55 @@ impl Read for SourceReader { } } +impl Debug for SourceReader { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + SourceReader::Read(_) => f.debug_tuple("Read").field(&"..").finish(), + SourceReader::File(file) => f.debug_tuple("File").field(file).finish(), + } + } +} + +/// Optional type color for [`ByteStream`], which determines type compatibility. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)] +pub enum ByteStreamType { + /// Compatible with [`Type::Binary`], and should only be converted to binary, even when the + /// desired type is unknown. + Binary, + /// Compatible with [`Type::String`], and should only be converted to string, even when the + /// desired type is unknown. + /// + /// This does not guarantee valid UTF-8 data, but it is conventionally so. Converting to + /// `String` still requires validation of the data. + String, + /// Unknown whether the stream should contain binary or string data. This usually is the result + /// of an external stream, e.g. an external command or file. + #[default] + Unknown, +} + +impl ByteStreamType { + /// Returns the string that describes the byte stream type - i.e., the same as what `describe` + /// produces. This can be used in type mismatch error messages. + pub fn describe(self) -> &'static str { + match self { + ByteStreamType::Binary => "binary (stream)", + ByteStreamType::String => "string (stream)", + ByteStreamType::Unknown => "byte stream", + } + } +} + +impl From for Type { + fn from(value: ByteStreamType) -> Self { + match value { + ByteStreamType::Binary => Type::Binary, + ByteStreamType::String => Type::String, + ByteStreamType::Unknown => Type::Any, + } + } +} + /// A potentially infinite, interruptible stream of bytes. /// /// To create a [`ByteStream`], you can use any of the following methods: @@ -65,20 +134,31 @@ impl Read for SourceReader { /// - [`from_iter`](ByteStream::from_iter): takes an [`Iterator`] whose items implement `AsRef<[u8]>`. /// - [`from_result_iter`](ByteStream::from_result_iter): same as [`from_iter`](ByteStream::from_iter), /// but each item is a `Result`. +/// - [`from_fn`](ByteStream::from_fn): uses a generator function to fill a buffer whenever it is +/// empty. This has high performance because it doesn't need to allocate for each chunk of data, +/// and can just reuse the same buffer. +/// +/// Byte streams have a [type](.type_()) which is used to preserve type compatibility when they +/// are the result of an internal command. It is important that this be set to the correct value. +/// [`Unknown`](ByteStreamType::Unknown) is used only for external sources where the type can not +/// be inherently determined, and having it automatically act as a string or binary depending on +/// whether it parses as UTF-8 or not is desirable. /// /// The data of a [`ByteStream`] can be accessed using one of the following methods: /// - [`reader`](ByteStream::reader): returns a [`Read`]-able type to get the raw bytes in the stream. /// - [`lines`](ByteStream::lines): splits the bytes on lines and returns an [`Iterator`] /// where each item is a `Result`. -/// - [`chunks`](ByteStream::chunks): returns an [`Iterator`] of [`Value`]s where each value is either a string or binary. +/// - [`chunks`](ByteStream::chunks): returns an [`Iterator`] of [`Value`]s where each value is +/// either a string or binary. /// Try not to use this method if possible. Rather, please use [`reader`](ByteStream::reader) /// (or [`lines`](ByteStream::lines) if it matches the situation). /// /// Additionally, there are few methods to collect a [`Bytestream`] into memory: /// - [`into_bytes`](ByteStream::into_bytes): collects all bytes into a [`Vec`]. /// - [`into_string`](ByteStream::into_string): collects all bytes into a [`String`], erroring if utf-8 decoding failed. -/// - [`into_value`](ByteStream::into_value): collects all bytes into a string [`Value`]. -/// If utf-8 decoding failed, then a binary [`Value`] is returned instead. +/// - [`into_value`](ByteStream::into_value): collects all bytes into a value typed appropriately +/// for the [type](.type_()) of this stream. If the type is [`Unknown`](ByteStreamType::Unknown), +/// it will produce a string value if the data is valid UTF-8, or a binary value otherwise. /// /// There are also a few other methods to consume all the data of a [`Bytestream`]: /// - [`drain`](ByteStream::drain): consumes all bytes and outputs nothing. @@ -88,54 +168,135 @@ impl Read for SourceReader { /// /// Internally, [`ByteStream`]s currently come in three flavors according to [`ByteStreamSource`]. /// See its documentation for more information. +#[derive(Debug)] pub struct ByteStream { stream: ByteStreamSource, span: Span, ctrlc: Option>, + type_: ByteStreamType, known_size: Option, } impl ByteStream { /// Create a new [`ByteStream`] from a [`ByteStreamSource`]. - pub fn new(stream: ByteStreamSource, span: Span, interrupt: Option>) -> Self { + pub fn new( + stream: ByteStreamSource, + span: Span, + interrupt: Option>, + type_: ByteStreamType, + ) -> Self { Self { stream, span, ctrlc: interrupt, + type_, known_size: None, } } - /// Create a new [`ByteStream`] from a [`ByteStreamSource::Read`]. + /// Create a [`ByteStream`] from an arbitrary reader. The type must be provided. pub fn read( reader: impl Read + Send + 'static, span: Span, interrupt: Option>, + type_: ByteStreamType, ) -> Self { - Self::new(ByteStreamSource::Read(Box::new(reader)), span, interrupt) + Self::new( + ByteStreamSource::Read(Box::new(reader)), + span, + interrupt, + type_, + ) } - /// Create a new [`ByteStream`] from a [`ByteStreamSource::File`]. + /// Create a [`ByteStream`] from a string. The type of the stream is always `String`. + pub fn read_string(string: String, span: Span, interrupt: Option>) -> Self { + let len = string.len(); + ByteStream::read( + Cursor::new(string.into_bytes()), + span, + interrupt, + ByteStreamType::String, + ) + .with_known_size(Some(len as u64)) + } + + /// Create a [`ByteStream`] from a byte vector. The type of the stream is always `Binary`. + pub fn read_binary(bytes: Vec, span: Span, interrupt: Option>) -> Self { + let len = bytes.len(); + ByteStream::read(Cursor::new(bytes), span, interrupt, ByteStreamType::Binary) + .with_known_size(Some(len as u64)) + } + + /// Create a [`ByteStream`] from a file. + /// + /// The type is implicitly `Unknown`, as it's not typically known whether files will + /// return text or binary. pub fn file(file: File, span: Span, interrupt: Option>) -> Self { - Self::new(ByteStreamSource::File(file), span, interrupt) + Self::new( + ByteStreamSource::File(file), + span, + interrupt, + ByteStreamType::Unknown, + ) } - /// Create a new [`ByteStream`] from a [`ByteStreamSource::Child`]. + /// Create a [`ByteStream`] from a child process's stdout and stderr. + /// + /// The type is implicitly `Unknown`, as it's not typically known whether child processes will + /// return text or binary. pub fn child(child: ChildProcess, span: Span) -> Self { - Self::new(ByteStreamSource::Child(Box::new(child)), span, None) + Self::new( + ByteStreamSource::Child(Box::new(child)), + span, + None, + ByteStreamType::Unknown, + ) } - /// Create a new [`ByteStream`] that reads from stdin. + /// Create a [`ByteStream`] that reads from stdin. + /// + /// The type is implicitly `Unknown`, as it's not typically known whether stdin is text or + /// binary. pub fn stdin(span: Span) -> Result { let stdin = os_pipe::dup_stdin().err_span(span)?; let source = ByteStreamSource::File(convert_file(stdin)); - Ok(Self::new(source, span, None)) + Ok(Self::new(source, span, None, ByteStreamType::Unknown)) + } + + /// Create a [`ByteStream`] from a generator function that writes data to the given buffer + /// when called, and returns `Ok(false)` on end of stream. + pub fn from_fn( + span: Span, + interrupt: Option>, + type_: ByteStreamType, + generator: impl FnMut(&mut Vec) -> Result + Send + 'static, + ) -> Self { + Self::read( + ReadGenerator { + buffer: Cursor::new(Vec::new()), + generator, + }, + span, + interrupt, + type_, + ) + } + + pub fn with_type(mut self, type_: ByteStreamType) -> Self { + self.type_ = type_; + self } /// Create a new [`ByteStream`] from an [`Iterator`] of bytes slices. /// /// The returned [`ByteStream`] will have a [`ByteStreamSource`] of `Read`. - pub fn from_iter(iter: I, span: Span, interrupt: Option>) -> Self + pub fn from_iter( + iter: I, + span: Span, + interrupt: Option>, + type_: ByteStreamType, + ) -> Self where I: IntoIterator, I::IntoIter: Send + 'static, @@ -143,13 +304,18 @@ impl ByteStream { { let iter = iter.into_iter(); let cursor = Some(Cursor::new(I::Item::default())); - Self::read(ReadIterator { iter, cursor }, span, interrupt) + Self::read(ReadIterator { iter, cursor }, span, interrupt, type_) } /// Create a new [`ByteStream`] from an [`Iterator`] of [`Result`] bytes slices. /// /// The returned [`ByteStream`] will have a [`ByteStreamSource`] of `Read`. - pub fn from_result_iter(iter: I, span: Span, interrupt: Option>) -> Self + pub fn from_result_iter( + iter: I, + span: Span, + interrupt: Option>, + type_: ByteStreamType, + ) -> Self where I: IntoIterator>, I::IntoIter: Send + 'static, @@ -157,7 +323,7 @@ impl ByteStream { { let iter = iter.into_iter(); let cursor = Some(Cursor::new(T::default())); - Self::read(ReadResultIterator { iter, cursor }, span, interrupt) + Self::read(ReadResultIterator { iter, cursor }, span, interrupt, type_) } /// Set the known size, in number of bytes, of the [`ByteStream`]. @@ -181,6 +347,11 @@ impl ByteStream { self.span } + /// Returns the [`ByteStreamType`] associated with the [`ByteStream`]. + pub fn type_(&self) -> ByteStreamType { + self.type_ + } + /// Returns the known size, in number of bytes, of the [`ByteStream`]. pub fn known_size(&self) -> Option { self.known_size @@ -220,8 +391,10 @@ impl ByteStream { /// Convert the [`ByteStream`] into a [`Chunks`] iterator where each element is a `Result`. /// /// Each call to [`next`](Iterator::next) reads the currently available data from the byte stream source, - /// up to a maximum size. If the chunk of bytes, or an expected portion of it, succeeds utf-8 decoding, - /// then it is returned as a [`Value::String`]. Otherwise, it is turned into a [`Value::Binary`]. + /// up to a maximum size. The values are typed according to the [type](.type_()) of the + /// stream, and if that type is [`Unknown`](ByteStreamType::Unknown), string values will be + /// produced as long as the stream continues to parse as valid UTF-8, but binary values will + /// be produced instead of the stream fails to parse as UTF-8 instead at any point. /// Any and all newlines are kept intact in each chunk. /// /// Where possible, prefer [`reader`](ByteStream::reader) or [`lines`](ByteStream::lines) over this method. @@ -232,12 +405,7 @@ impl ByteStream { /// then the stream is considered empty and `None` will be returned. pub fn chunks(self) -> Option { let reader = self.stream.reader()?; - Some(Chunks { - reader: BufReader::new(reader), - span: self.span, - ctrlc: self.ctrlc, - leftover: Vec::new(), - }) + Some(Chunks::new(reader, self.span, self.ctrlc, self.type_)) } /// Convert the [`ByteStream`] into its inner [`ByteStreamSource`]. @@ -305,33 +473,64 @@ impl ByteStream { } } - /// Collect all the bytes of the [`ByteStream`] into a [`String`]. + /// Collect the stream into a `String` in-memory. This can only succeed if the data contained is + /// valid UTF-8. /// - /// The trailing new line (`\n` or `\r\n`), if any, is removed from the [`String`] prior to being returned. + /// The trailing new line (`\n` or `\r\n`), if any, is removed from the [`String`] prior to + /// being returned, if this is a stream coming from an external process or file. /// - /// If utf-8 decoding fails, an error is returned. + /// If the [type](.type_()) is specified as `Binary`, this operation always fails, even if the + /// data would have been valid UTF-8. pub fn into_string(self) -> Result { let span = self.span; - let bytes = self.into_bytes()?; - let mut string = String::from_utf8(bytes).map_err(|_| ShellError::NonUtf8 { span })?; - trim_end_newline(&mut string); - Ok(string) + if self.type_ != ByteStreamType::Binary { + let trim = self.stream.is_external(); + let bytes = self.into_bytes()?; + let mut string = String::from_utf8(bytes).map_err(|err| ShellError::NonUtf8Custom { + span, + msg: err.to_string(), + })?; + if trim { + trim_end_newline(&mut string); + } + Ok(string) + } else { + Err(ShellError::TypeMismatch { + err_message: "expected string, but got binary".into(), + span, + }) + } } /// Collect all the bytes of the [`ByteStream`] into a [`Value`]. /// - /// If the collected bytes are successfully decoded as utf-8, then a [`Value::String`] is returned. - /// The trailing new line (`\n` or `\r\n`), if any, is removed from the [`String`] prior to being returned. - /// Otherwise, a [`Value::Binary`] is returned with any trailing new lines preserved. + /// If this is a `String` stream, the stream is decoded to UTF-8. If the stream came from an + /// external process or file, the trailing new line (`\n` or `\r\n`), if any, is removed from + /// the [`String`] prior to being returned. + /// + /// If this is a `Binary` stream, a [`Value::Binary`] is returned with any trailing new lines + /// preserved. + /// + /// If this is an `Unknown` stream, the behavior depends on whether the stream parses as valid + /// UTF-8 or not. If it does, this is uses the `String` behavior; if not, it uses the `Binary` + /// behavior. pub fn into_value(self) -> Result { let span = self.span; - let bytes = self.into_bytes()?; - let value = match String::from_utf8(bytes) { - Ok(mut str) => { - trim_end_newline(&mut str); - Value::string(str, span) - } - Err(err) => Value::binary(err.into_bytes(), span), + let trim = self.stream.is_external(); + let value = match self.type_ { + // If the type is specified, then the stream should always become that type: + ByteStreamType::Binary => Value::binary(self.into_bytes()?, span), + ByteStreamType::String => Value::string(self.into_string()?, span), + // If the type is not specified, then it just depends on whether it parses or not: + ByteStreamType::Unknown => match String::from_utf8(self.into_bytes()?) { + Ok(mut str) => { + if trim { + trim_end_newline(&mut str); + } + Value::string(str, span) + } + Err(err) => Value::binary(err.into_bytes(), span), + }, }; Ok(value) } @@ -477,12 +676,6 @@ impl ByteStream { } } -impl Debug for ByteStream { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("ByteStream").finish() - } -} - impl From for PipelineData { fn from(stream: ByteStream) -> Self { Self::ByteStream(stream, None) @@ -613,54 +806,157 @@ impl Iterator for Lines { } } +/// Turn a readable stream into [`Value`]s. +/// +/// The `Value` type depends on the type of the stream ([`ByteStreamType`]). If `Unknown`, the +/// stream will return strings as long as UTF-8 parsing succeeds, but will start returning binary +/// if it fails. pub struct Chunks { reader: BufReader, + pos: u64, + error: bool, span: Span, ctrlc: Option>, - leftover: Vec, + type_: ByteStreamType, } impl Chunks { + fn new( + reader: SourceReader, + span: Span, + ctrlc: Option>, + type_: ByteStreamType, + ) -> Self { + Self { + reader: BufReader::new(reader), + pos: 0, + error: false, + span, + ctrlc, + type_, + } + } + pub fn span(&self) -> Span { self.span } + + fn next_string(&mut self) -> Result, (Vec, ShellError)> { + // Get some data from the reader + let buf = self + .reader + .fill_buf() + .err_span(self.span) + .map_err(|err| (vec![], ShellError::from(err)))?; + + // If empty, this is EOF + if buf.is_empty() { + return Ok(None); + } + + let mut buf = buf.to_vec(); + let mut consumed = 0; + + // If the buf length is under 4 bytes, it could be invalid, so try to get more + if buf.len() < 4 { + consumed += buf.len(); + self.reader.consume(buf.len()); + match self.reader.fill_buf().err_span(self.span) { + Ok(more_bytes) => buf.extend_from_slice(more_bytes), + Err(err) => return Err((buf, err.into())), + } + } + + // Try to parse utf-8 and decide what to do + match String::from_utf8(buf) { + Ok(string) => { + self.reader.consume(string.len() - consumed); + self.pos += string.len() as u64; + Ok(Some(string)) + } + Err(err) if err.utf8_error().error_len().is_none() => { + // There is some valid data at the beginning, and this is just incomplete, so just + // consume that and return it + let valid_up_to = err.utf8_error().valid_up_to(); + if valid_up_to > consumed { + self.reader.consume(valid_up_to - consumed); + } + let mut buf = err.into_bytes(); + buf.truncate(valid_up_to); + buf.shrink_to_fit(); + let string = String::from_utf8(buf) + .expect("failed to parse utf-8 even after correcting error"); + self.pos += string.len() as u64; + Ok(Some(string)) + } + Err(err) => { + // There is an error at the beginning and we have no hope of parsing further. + let shell_error = ShellError::NonUtf8Custom { + msg: format!("invalid utf-8 sequence starting at index {}", self.pos), + span: self.span, + }; + let buf = err.into_bytes(); + // We are consuming the entire buf though, because we're returning it in case it + // will be cast to binary + if buf.len() > consumed { + self.reader.consume(buf.len() - consumed); + } + self.pos += buf.len() as u64; + Err((buf, shell_error)) + } + } + } } impl Iterator for Chunks { type Item = Result; fn next(&mut self) -> Option { - if nu_utils::ctrl_c::was_pressed(&self.ctrlc) { + if self.error || nu_utils::ctrl_c::was_pressed(&self.ctrlc) { None } else { - loop { - match self.reader.fill_buf() { - Ok(buf) => { - self.leftover.extend_from_slice(buf); + match self.type_ { + // Binary should always be binary + ByteStreamType::Binary => { + let buf = match self.reader.fill_buf().err_span(self.span) { + Ok(buf) => buf, + Err(err) => { + self.error = true; + return Some(Err(err.into())); + } + }; + if !buf.is_empty() { let len = buf.len(); + let value = Value::binary(buf, self.span); self.reader.consume(len); - break; - } - Err(e) if e.kind() == io::ErrorKind::Interrupted => continue, - Err(err) => return Some(Err(err.into_spanned(self.span).into())), - }; - } - - if self.leftover.is_empty() { - return None; - } - - match String::from_utf8(std::mem::take(&mut self.leftover)) { - Ok(str) => Some(Ok(Value::string(str, self.span))), - Err(err) => { - if err.utf8_error().error_len().is_some() { - Some(Ok(Value::binary(err.into_bytes(), self.span))) + self.pos += len as u64; + Some(Ok(value)) } else { - let i = err.utf8_error().valid_up_to(); - let mut bytes = err.into_bytes(); - self.leftover = bytes.split_off(i); - let str = String::from_utf8(bytes).expect("valid utf8"); - Some(Ok(Value::string(str, self.span))) + None + } + } + // String produces an error if UTF-8 can't be parsed + ByteStreamType::String => match self.next_string().transpose()? { + Ok(string) => Some(Ok(Value::string(string, self.span))), + Err((_, err)) => { + self.error = true; + Some(Err(err)) + } + }, + // For Unknown, we try to create strings, but we switch to binary mode if we + // fail + ByteStreamType::Unknown => { + match self.next_string().transpose()? { + Ok(string) => Some(Ok(Value::string(string, self.span))), + Err((buf, _)) if !buf.is_empty() => { + // Switch to binary mode + self.type_ = ByteStreamType::Binary; + Some(Ok(Value::binary(buf, self.span))) + } + Err((_, err)) => { + self.error = true; + Some(Err(err)) + } } } } @@ -776,11 +1072,58 @@ where Ok(len as u64) } +struct ReadGenerator +where + F: FnMut(&mut Vec) -> Result + Send + 'static, +{ + buffer: Cursor>, + generator: F, +} + +impl BufRead for ReadGenerator +where + F: FnMut(&mut Vec) -> Result + Send + 'static, +{ + fn fill_buf(&mut self) -> std::io::Result<&[u8]> { + // We have to loop, because it's important that we don't leave the buffer empty unless we're + // truly at the end of the stream. + while self.buffer.fill_buf()?.is_empty() { + // Reset the cursor to the beginning and truncate + self.buffer.set_position(0); + self.buffer.get_mut().clear(); + // Ask the generator to generate data + if !(self.generator)(self.buffer.get_mut())? { + // End of stream + break; + } + } + self.buffer.fill_buf() + } + + fn consume(&mut self, amt: usize) { + self.buffer.consume(amt); + } +} + +impl Read for ReadGenerator +where + F: FnMut(&mut Vec) -> Result + Send + 'static, +{ + fn read(&mut self, buf: &mut [u8]) -> std::io::Result { + // Straightforward implementation on top of BufRead + let slice = self.fill_buf()?; + let len = buf.len().min(slice.len()); + buf[..len].copy_from_slice(&slice[..len]); + self.consume(len); + Ok(len) + } +} + #[cfg(test)] mod tests { use super::*; - fn test_chunks(data: Vec) -> Chunks + fn test_chunks(data: Vec, type_: ByteStreamType) -> Chunks where T: AsRef<[u8]> + Default + Send + 'static, { @@ -788,46 +1131,89 @@ mod tests { iter: data.into_iter(), cursor: Some(Cursor::new(T::default())), }; - Chunks { - reader: BufReader::new(SourceReader::Read(Box::new(reader))), - span: Span::test_data(), - ctrlc: None, - leftover: Vec::new(), - } + Chunks::new( + SourceReader::Read(Box::new(reader)), + Span::test_data(), + None, + type_, + ) } #[test] - fn chunks_read_string() { - let data = vec!["Nushell", "が好きです"]; - let chunks = test_chunks(data.clone()); - let actual = chunks.collect::, _>>().unwrap(); - let expected = data.into_iter().map(Value::test_string).collect::>(); - assert_eq!(expected, actual); - } + fn chunks_read_binary_passthrough() { + let bins = vec![&[0, 1][..], &[2, 3][..]]; + let iter = test_chunks(bins.clone(), ByteStreamType::Binary); - #[test] - fn chunks_read_string_split_utf8() { - let expected = "Nushell最高!"; - let chunks = test_chunks(vec![&b"Nushell\xe6"[..], b"\x9c\x80\xe9", b"\xab\x98!"]); - - let actual = chunks + let bins_values: Vec = bins .into_iter() - .map(|value| value.and_then(Value::into_string)) - .collect::>() - .unwrap(); - - assert_eq!(expected, actual); + .map(|bin| Value::binary(bin, Span::test_data())) + .collect(); + assert_eq!( + bins_values, + iter.collect::, _>>().expect("error") + ); } #[test] - fn chunks_returns_string_or_binary() { - let chunks = test_chunks(vec![b"Nushell".as_slice(), b"\x9c\x80\xe9abcd", b"efgh"]); - let actual = chunks.collect::, _>>().unwrap(); - let expected = vec![ - Value::test_string("Nushell"), - Value::test_binary(b"\x9c\x80\xe9abcd"), - Value::test_string("efgh"), - ]; - assert_eq!(actual, expected) + fn chunks_read_string_clean() { + let strs = vec!["Nushell", "が好きです"]; + let iter = test_chunks(strs.clone(), ByteStreamType::String); + + let strs_values: Vec = strs + .into_iter() + .map(|string| Value::string(string, Span::test_data())) + .collect(); + assert_eq!( + strs_values, + iter.collect::, _>>().expect("error") + ); + } + + #[test] + fn chunks_read_string_split_boundary() { + let real = "Nushell最高!"; + let chunks = vec![&b"Nushell\xe6"[..], &b"\x9c\x80\xe9"[..], &b"\xab\x98!"[..]]; + let iter = test_chunks(chunks.clone(), ByteStreamType::String); + + let mut string = String::new(); + for value in iter { + let chunk_string = value.expect("error").into_string().expect("not a string"); + string.push_str(&chunk_string); + } + assert_eq!(real, string); + } + + #[test] + fn chunks_read_string_utf8_error() { + let chunks = vec![&b"Nushell\xe6"[..], &b"\x9c\x80\xe9"[..], &b"\xab"[..]]; + let iter = test_chunks(chunks, ByteStreamType::String); + + let mut string = String::new(); + for value in iter { + match value { + Ok(value) => string.push_str(&value.into_string().expect("not a string")), + Err(err) => { + println!("string so far: {:?}", string); + println!("got error: {err:?}"); + assert!(!string.is_empty()); + assert!(matches!(err, ShellError::NonUtf8Custom { .. })); + return; + } + } + } + panic!("no error"); + } + + #[test] + fn chunks_read_unknown_fallback() { + let chunks = vec![&b"Nushell"[..], &b"\x9c\x80\xe9abcd"[..], &b"efgh"[..]]; + let mut iter = test_chunks(chunks, ByteStreamType::Unknown); + + let mut get = || iter.next().expect("end of iter").expect("error"); + + assert_eq!(Value::test_string("Nushell"), get()); + assert_eq!(Value::test_binary(b"\x9c\x80\xe9abcd"), get()); + // Once it's in binary mode it won't go back + assert_eq!(Value::test_binary(b"efgh"), get()); } } diff --git a/crates/nu-protocol/src/pipeline/pipeline_data.rs b/crates/nu-protocol/src/pipeline/pipeline_data.rs index 7faa4ed221..0a13ffa4b3 100644 --- a/crates/nu-protocol/src/pipeline/pipeline_data.rs +++ b/crates/nu-protocol/src/pipeline/pipeline_data.rs @@ -2,8 +2,8 @@ use crate::{ ast::{Call, PathMember}, engine::{EngineState, Stack}, process::{ChildPipe, ChildProcess, ExitStatus}, - ByteStream, Config, ErrSpan, ListStream, OutDest, PipelineMetadata, Range, ShellError, Span, - Value, + ByteStream, ByteStreamType, Config, ErrSpan, ListStream, OutDest, PipelineMetadata, Range, + ShellError, Span, Value, }; use nu_utils::{stderr_write_all_and_flush, stdout_write_all_and_flush}; use std::{ @@ -170,6 +170,8 @@ impl PipelineData { /// Try convert from self into iterator /// /// It returns Err if the `self` cannot be converted to an iterator. + /// + /// The `span` should be the span of the command or operation that would raise an error. pub fn into_iter_strict(self, span: Span) -> Result { Ok(PipelineIterator(match self { PipelineData::Value(value, ..) => { @@ -274,7 +276,7 @@ impl PipelineData { span: head, }), PipelineData::ByteStream(stream, ..) => Err(ShellError::IncompatiblePathAccess { - type_name: "byte stream".to_string(), + type_name: stream.type_().describe().to_owned(), span: stream.span(), }), } @@ -313,16 +315,7 @@ impl PipelineData { Ok(PipelineData::ListStream(stream.map(f), metadata)) } PipelineData::ByteStream(stream, metadata) => { - // TODO: is this behavior desired / correct ? - let span = stream.span(); - let value = match String::from_utf8(stream.into_bytes()?) { - Ok(mut str) => { - str.truncate(str.trim_end_matches(LINE_ENDING_PATTERN).len()); - f(Value::string(str, span)) - } - Err(err) => f(Value::binary(err.into_bytes(), span)), - }; - Ok(value.into_pipeline_data_with_metadata(metadata)) + Ok(f(stream.into_value()?).into_pipeline_data_with_metadata(metadata)) } } } @@ -543,22 +536,26 @@ impl PipelineData { no_newline: bool, to_stderr: bool, ) -> Result, ShellError> { - if let PipelineData::ByteStream(stream, ..) = self { - stream.print(to_stderr) - } else { - // If the table function is in the declarations, then we can use it - // to create the table value that will be printed in the terminal - if let Some(decl_id) = engine_state.table_decl_id { - let command = engine_state.get_decl(decl_id); - if command.block_id().is_some() { - self.write_all_and_flush(engine_state, no_newline, to_stderr) + match self { + // Print byte streams directly as long as they aren't binary. + PipelineData::ByteStream(stream, ..) if stream.type_() != ByteStreamType::Binary => { + stream.print(to_stderr) + } + _ => { + // If the table function is in the declarations, then we can use it + // to create the table value that will be printed in the terminal + if let Some(decl_id) = engine_state.table_decl_id { + let command = engine_state.get_decl(decl_id); + if command.block_id().is_some() { + self.write_all_and_flush(engine_state, no_newline, to_stderr) + } else { + let call = Call::new(Span::new(0, 0)); + let table = command.run(engine_state, stack, &call, self)?; + table.write_all_and_flush(engine_state, no_newline, to_stderr) + } } else { - let call = Call::new(Span::new(0, 0)); - let table = command.run(engine_state, stack, &call, self)?; - table.write_all_and_flush(engine_state, no_newline, to_stderr) + self.write_all_and_flush(engine_state, no_newline, to_stderr) } - } else { - self.write_all_and_flush(engine_state, no_newline, to_stderr) } } } diff --git a/crates/nu_plugin_example/src/commands/collect_bytes.rs b/crates/nu_plugin_example/src/commands/collect_bytes.rs index 51ca1d4222..398a1de4b1 100644 --- a/crates/nu_plugin_example/src/commands/collect_bytes.rs +++ b/crates/nu_plugin_example/src/commands/collect_bytes.rs @@ -1,6 +1,7 @@ use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand}; use nu_protocol::{ - ByteStream, Category, Example, LabeledError, PipelineData, Signature, Type, Value, + ByteStream, ByteStreamType, Category, Example, LabeledError, PipelineData, Signature, Type, + Value, }; use crate::ExamplePlugin; @@ -52,6 +53,7 @@ impl PluginCommand for CollectBytes { input.into_iter().map(Value::coerce_into_binary), call.head, None, + ByteStreamType::Unknown, ), None, )) From c98960d0536dc0849d538e24cd52038b4c26685b Mon Sep 17 00:00:00 2001 From: Ian Manske Date: Mon, 20 May 2024 13:10:36 +0000 Subject: [PATCH 055/137] Take owned `Read` and `Write` (#12909) # Description As @YizhePKU pointed out, the [Rust API guidelines](https://rust-lang.github.io/api-guidelines/interoperability.html#generic-readerwriter-functions-take-r-read-and-w-write-by-value-c-rw-value) recommend that generic functions take readers and writers by value and not by reference. This PR changes `copy_with_interupt` and few other places to take owned `Read` and `Write` instead of mutable references. --- crates/nu-command/src/filesystem/save.rs | 4 +- crates/nu-command/src/filters/tee.rs | 10 +-- .../nu-protocol/src/pipeline/byte_stream.rs | 68 ++++++++----------- 3 files changed, 36 insertions(+), 46 deletions(-) diff --git a/crates/nu-command/src/filesystem/save.rs b/crates/nu-command/src/filesystem/save.rs index 340ceb4f62..ab5257fb01 100644 --- a/crates/nu-command/src/filesystem/save.rs +++ b/crates/nu-command/src/filesystem/save.rs @@ -508,7 +508,7 @@ fn get_files( } fn stream_to_file( - mut source: impl Read, + source: impl Read, known_size: Option, ctrlc: Option>, mut file: File, @@ -555,7 +555,7 @@ fn stream_to_file( Ok(()) } } else { - copy_with_interrupt(&mut source, &mut file, span, ctrlc.as_deref())?; + copy_with_interrupt(source, file, span, ctrlc.as_deref())?; Ok(()) } } diff --git a/crates/nu-command/src/filters/tee.rs b/crates/nu-command/src/filters/tee.rs index d6decd3bc6..2251f6646a 100644 --- a/crates/nu-command/src/filters/tee.rs +++ b/crates/nu-command/src/filters/tee.rs @@ -403,8 +403,8 @@ struct StreamInfo { metadata: Option, } -fn copy(mut src: impl Read, mut dest: impl Write, info: &StreamInfo) -> Result<(), ShellError> { - copy_with_interrupt(&mut src, &mut dest, info.span, info.ctrlc.as_deref())?; +fn copy(src: impl Read, dest: impl Write, info: &StreamInfo) -> Result<(), ShellError> { + copy_with_interrupt(src, dest, info.span, info.ctrlc.as_deref())?; Ok(()) } @@ -416,8 +416,8 @@ fn copy_pipe(pipe: ChildPipe, dest: impl Write, info: &StreamInfo) -> Result<(), } fn copy_on_thread( - mut src: impl Read + Send + 'static, - mut dest: impl Write + Send + 'static, + src: impl Read + Send + 'static, + dest: impl Write + Send + 'static, info: &StreamInfo, ) -> Result>, ShellError> { let span = info.span; @@ -425,7 +425,7 @@ fn copy_on_thread( thread::Builder::new() .name("stderr copier".into()) .spawn(move || { - copy_with_interrupt(&mut src, &mut dest, span, ctrlc.as_deref())?; + copy_with_interrupt(src, dest, span, ctrlc.as_deref())?; Ok(()) }) .map_err(|e| e.into_spanned(span).into()) diff --git a/crates/nu-protocol/src/pipeline/byte_stream.rs b/crates/nu-protocol/src/pipeline/byte_stream.rs index e77c2cc855..35ce39ed31 100644 --- a/crates/nu-protocol/src/pipeline/byte_stream.rs +++ b/crates/nu-protocol/src/pipeline/byte_stream.rs @@ -541,8 +541,8 @@ impl ByteStream { /// then the [`ExitStatus`] of the [`ChildProcess`] is returned. pub fn drain(self) -> Result, ShellError> { match self.stream { - ByteStreamSource::Read(mut read) => { - copy_with_interrupt(&mut read, &mut io::sink(), self.span, self.ctrlc.as_deref())?; + ByteStreamSource::Read(read) => { + copy_with_interrupt(read, io::sink(), self.span, self.ctrlc.as_deref())?; Ok(None) } ByteStreamSource::File(_) => Ok(None), @@ -566,16 +566,16 @@ impl ByteStream { /// /// If the source of the [`ByteStream`] is [`ByteStreamSource::Child`], /// then the [`ExitStatus`] of the [`ChildProcess`] is returned. - pub fn write_to(self, dest: &mut impl Write) -> Result, ShellError> { + pub fn write_to(self, dest: impl Write) -> Result, ShellError> { let span = self.span; let ctrlc = self.ctrlc.as_deref(); match self.stream { - ByteStreamSource::Read(mut read) => { - copy_with_interrupt(&mut read, dest, span, ctrlc)?; + ByteStreamSource::Read(read) => { + copy_with_interrupt(read, dest, span, ctrlc)?; Ok(None) } - ByteStreamSource::File(mut file) => { - copy_with_interrupt(&mut file, dest, span, ctrlc)?; + ByteStreamSource::File(file) => { + copy_with_interrupt(file, dest, span, ctrlc)?; Ok(None) } ByteStreamSource::Child(mut child) => { @@ -586,11 +586,11 @@ impl ByteStream { if let Some(stdout) = child.stdout.take() { match stdout { - ChildPipe::Pipe(mut pipe) => { - copy_with_interrupt(&mut pipe, dest, span, ctrlc)?; + ChildPipe::Pipe(pipe) => { + copy_with_interrupt(pipe, dest, span, ctrlc)?; } - ChildPipe::Tee(mut tee) => { - copy_with_interrupt(&mut tee, dest, span, ctrlc)?; + ChildPipe::Tee(tee) => { + copy_with_interrupt(tee, dest, span, ctrlc)?; } } } @@ -612,14 +612,14 @@ impl ByteStream { write_to_out_dest(read, stdout, true, span, ctrlc)?; Ok(None) } - ByteStreamSource::File(mut file) => { + ByteStreamSource::File(file) => { match stdout { OutDest::Pipe | OutDest::Capture | OutDest::Null => {} OutDest::Inherit => { - copy_with_interrupt(&mut file, &mut io::stdout(), span, ctrlc)?; + copy_with_interrupt(file, io::stdout(), span, ctrlc)?; } OutDest::File(f) => { - copy_with_interrupt(&mut file, &mut f.as_ref(), span, ctrlc)?; + copy_with_interrupt(file, f.as_ref(), span, ctrlc)?; } } Ok(None) @@ -974,7 +974,7 @@ fn trim_end_newline(string: &mut String) { } fn write_to_out_dest( - mut read: impl Read, + read: impl Read, stream: &OutDest, stdout: bool, span: Span, @@ -982,12 +982,10 @@ fn write_to_out_dest( ) -> Result<(), ShellError> { match stream { OutDest::Pipe | OutDest::Capture => return Ok(()), - OutDest::Null => copy_with_interrupt(&mut read, &mut io::sink(), span, ctrlc), - OutDest::Inherit if stdout => { - copy_with_interrupt(&mut read, &mut io::stdout(), span, ctrlc) - } - OutDest::Inherit => copy_with_interrupt(&mut read, &mut io::stderr(), span, ctrlc), - OutDest::File(file) => copy_with_interrupt(&mut read, &mut file.as_ref(), span, ctrlc), + OutDest::Null => copy_with_interrupt(read, io::sink(), span, ctrlc), + OutDest::Inherit if stdout => copy_with_interrupt(read, io::stdout(), span, ctrlc), + OutDest::Inherit => copy_with_interrupt(read, io::stderr(), span, ctrlc), + OutDest::File(file) => copy_with_interrupt(read, file.as_ref(), span, ctrlc), }?; Ok(()) } @@ -1004,22 +1002,18 @@ pub(crate) fn convert_file>(file: impl Into) - const DEFAULT_BUF_SIZE: usize = 8192; -pub fn copy_with_interrupt( - reader: &mut R, - writer: &mut W, +pub fn copy_with_interrupt( + mut reader: impl Read, + mut writer: impl Write, span: Span, interrupt: Option<&AtomicBool>, -) -> Result -where - R: Read, - W: Write, -{ +) -> Result { if let Some(interrupt) = interrupt { // #[cfg(any(target_os = "linux", target_os = "android"))] // { // return crate::sys::kernel_copy::copy_spec(reader, writer); // } - match generic_copy(reader, writer, span, interrupt) { + match generic_copy(&mut reader, &mut writer, span, interrupt) { Ok(len) => { writer.flush().err_span(span)?; Ok(len) @@ -1030,7 +1024,7 @@ where } } } else { - match io::copy(reader, writer) { + match io::copy(&mut reader, &mut writer) { Ok(n) => { writer.flush().err_span(span)?; Ok(n) @@ -1044,16 +1038,12 @@ where } // Copied from [`std::io::copy`] -fn generic_copy( - reader: &mut R, - writer: &mut W, +fn generic_copy( + mut reader: impl Read, + mut writer: impl Write, span: Span, interrupt: &AtomicBool, -) -> Result -where - R: Read, - W: Write, -{ +) -> Result { let buf = &mut [0; DEFAULT_BUF_SIZE]; let mut len = 0; loop { From 4f69ba172e141a23ed7d9a27a65bbed7b2f91b0d Mon Sep 17 00:00:00 2001 From: Darren Schroeder <343840+fdncred@users.noreply.github.com> Date: Mon, 20 May 2024 10:08:03 -0500 Subject: [PATCH 056/137] add `math min` and `math max` to `bench` command (#12913) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Description This PR adds min and max to the bench command. ```nushell ❯ use std bench ❯ bench { dply -c 'parquet("./data.parquet") | group_by(year) | summarize(count = n(), sum = sum(geo_count)) | show()' | complete | null } --rounds 100 --verbose 100 / 100 ╭───────┬───────────────────╮ │ mean │ 71ms 358µs 850ns │ │ min │ 66ms 457µs 583ns │ │ max │ 120ms 338µs 167ns │ │ std │ 6ms 553µs 949ns │ │ times │ [list 100 items] │ ╰───────┴───────────────────╯ ``` # User-Facing Changes # Tests + Formatting # After Submitting --- crates/nu-std/std/mod.nu | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crates/nu-std/std/mod.nu b/crates/nu-std/std/mod.nu index 9d4387fa95..a20bdb4b78 100644 --- a/crates/nu-std/std/mod.nu +++ b/crates/nu-std/std/mod.nu @@ -148,6 +148,8 @@ export def bench [ let report = { mean: ($times | math avg | from ns) + min: ($times | math min | from ns) + max: ($times | math max | from ns) std: ($times | math stddev | from ns) times: ($times | each { from ns }) } From 905e3d0715ad82fbf837e059ad7cd5184ea77f89 Mon Sep 17 00:00:00 2001 From: Ian Manske Date: Mon, 20 May 2024 17:22:08 +0000 Subject: [PATCH 057/137] Remove dataframes crate and feature (#12889) # Description Removes the old `nu-cmd-dataframe` crate in favor of the polars plugin. As such, this PR also removes the `dataframe` feature, related CI, and full releases of nushell. --- .github/workflows/ci.yml | 42 +- .github/workflows/nightly-build.yml | 131 +- .github/workflows/release-pkg.nu | 75 +- .github/workflows/release.yml | 105 +- CONTRIBUTING.md | 12 +- Cargo.lock | 24 - Cargo.toml | 7 - crates/nu-cmd-dataframe/Cargo.toml | 75 - crates/nu-cmd-dataframe/LICENSE | 21 - .../nu-cmd-dataframe/src/dataframe/README.md | 12 - .../src/dataframe/eager/append.rs | 134 -- .../src/dataframe/eager/cast.rs | 195 --- .../src/dataframe/eager/columns.rs | 73 - .../src/dataframe/eager/drop.rs | 115 -- .../src/dataframe/eager/drop_duplicates.rs | 119 -- .../src/dataframe/eager/drop_nulls.rs | 137 -- .../src/dataframe/eager/dtypes.rs | 104 -- .../src/dataframe/eager/dummies.rs | 107 -- .../src/dataframe/eager/filter_with.rs | 154 -- .../src/dataframe/eager/first.rs | 144 -- .../src/dataframe/eager/get.rs | 87 - .../src/dataframe/eager/last.rs | 118 -- .../src/dataframe/eager/list.rs | 68 - .../src/dataframe/eager/melt.rs | 248 --- .../src/dataframe/eager/mod.rs | 114 -- .../src/dataframe/eager/open.rs | 518 ------ .../src/dataframe/eager/query_df.rs | 104 -- .../src/dataframe/eager/rename.rs | 185 --- .../src/dataframe/eager/sample.rs | 127 -- .../src/dataframe/eager/schema.rs | 112 -- .../src/dataframe/eager/shape.rs | 82 - .../src/dataframe/eager/slice.rs | 84 - .../src/dataframe/eager/sql_context.rs | 228 --- .../src/dataframe/eager/sql_expr.rs | 200 --- .../src/dataframe/eager/summary.rs | 279 ---- .../src/dataframe/eager/take.rs | 148 -- .../src/dataframe/eager/to_arrow.rs | 79 - .../src/dataframe/eager/to_avro.rs | 109 -- .../src/dataframe/eager/to_csv.rs | 125 -- .../src/dataframe/eager/to_df.rs | 189 --- .../src/dataframe/eager/to_json_lines.rs | 80 - .../src/dataframe/eager/to_nu.rs | 136 -- .../src/dataframe/eager/to_parquet.rs | 79 - .../src/dataframe/eager/with_column.rs | 202 --- .../src/dataframe/expressions/alias.rs | 86 - .../src/dataframe/expressions/arg_where.rs | 78 - .../src/dataframe/expressions/col.rs | 68 - .../src/dataframe/expressions/concat_str.rs | 108 -- .../src/dataframe/expressions/datepart.rs | 170 -- .../expressions/expressions_macro.rs | 736 --------- .../src/dataframe/expressions/is_in.rs | 116 -- .../src/dataframe/expressions/lit.rs | 69 - .../src/dataframe/expressions/mod.rs | 62 - .../src/dataframe/expressions/otherwise.rs | 126 -- .../src/dataframe/expressions/quantile.rs | 101 -- .../src/dataframe/expressions/when.rs | 147 -- .../src/dataframe/lazy/aggregate.rs | 216 --- .../src/dataframe/lazy/collect.rs | 73 - .../src/dataframe/lazy/explode.rs | 153 -- .../src/dataframe/lazy/fetch.rs | 92 -- .../src/dataframe/lazy/fill_nan.rs | 143 -- .../src/dataframe/lazy/fill_null.rs | 93 -- .../src/dataframe/lazy/filter.rs | 83 - .../src/dataframe/lazy/flatten.rs | 126 -- .../src/dataframe/lazy/groupby.rs | 161 -- .../src/dataframe/lazy/join.rs | 252 --- .../src/dataframe/lazy/macro_commands.rs | 246 --- .../src/dataframe/lazy/mod.rs | 65 - .../src/dataframe/lazy/quantile.rs | 87 - .../src/dataframe/lazy/select.rs | 75 - .../src/dataframe/lazy/sort_by_expr.rs | 159 -- .../src/dataframe/lazy/to_lazy.rs | 53 - crates/nu-cmd-dataframe/src/dataframe/mod.rs | 36 - .../src/dataframe/series/all_false.rs | 108 -- .../src/dataframe/series/all_true.rs | 105 -- .../src/dataframe/series/arg_max.rs | 85 - .../src/dataframe/series/arg_min.rs | 85 - .../src/dataframe/series/cumulative.rs | 148 -- .../src/dataframe/series/date/as_date.rs | 94 -- .../src/dataframe/series/date/as_datetime.rs | 187 --- .../src/dataframe/series/date/get_day.rs | 90 -- .../src/dataframe/series/date/get_hour.rs | 90 -- .../src/dataframe/series/date/get_minute.rs | 90 -- .../src/dataframe/series/date/get_month.rs | 90 -- .../dataframe/series/date/get_nanosecond.rs | 90 -- .../src/dataframe/series/date/get_ordinal.rs | 90 -- .../src/dataframe/series/date/get_second.rs | 90 -- .../src/dataframe/series/date/get_week.rs | 90 -- .../src/dataframe/series/date/get_weekday.rs | 90 -- .../src/dataframe/series/date/get_year.rs | 90 -- .../src/dataframe/series/date/mod.rs | 25 - .../src/dataframe/series/indexes/arg_sort.rs | 130 -- .../src/dataframe/series/indexes/arg_true.rs | 115 -- .../dataframe/series/indexes/arg_unique.rs | 93 -- .../src/dataframe/series/indexes/mod.rs | 9 - .../dataframe/series/indexes/set_with_idx.rs | 213 --- .../dataframe/series/masks/is_duplicated.rs | 122 -- .../src/dataframe/series/masks/is_in.rs | 104 -- .../src/dataframe/series/masks/is_not_null.rs | 122 -- .../src/dataframe/series/masks/is_null.rs | 122 -- .../src/dataframe/series/masks/is_unique.rs | 121 -- .../src/dataframe/series/masks/mod.rs | 15 - .../src/dataframe/series/masks/not.rs | 93 -- .../src/dataframe/series/masks/set.rs | 201 --- .../src/dataframe/series/mod.rs | 95 -- .../src/dataframe/series/n_null.rs | 82 - .../src/dataframe/series/n_unique.rs | 127 -- .../src/dataframe/series/rolling.rs | 186 --- .../src/dataframe/series/shift.rs | 115 -- .../dataframe/series/string/concatenate.rs | 113 -- .../src/dataframe/series/string/contains.rs | 106 -- .../src/dataframe/series/string/mod.rs | 19 - .../src/dataframe/series/string/replace.rs | 120 -- .../dataframe/series/string/replace_all.rs | 121 -- .../dataframe/series/string/str_lengths.rs | 87 - .../src/dataframe/series/string/str_slice.rs | 136 -- .../src/dataframe/series/string/strftime.rs | 105 -- .../dataframe/series/string/to_lowercase.rs | 92 -- .../dataframe/series/string/to_uppercase.rs | 96 -- .../src/dataframe/series/unique.rs | 146 -- .../src/dataframe/series/value_counts.rs | 95 -- crates/nu-cmd-dataframe/src/dataframe/stub.rs | 34 - .../src/dataframe/test_dataframe.rs | 98 -- .../nu-cmd-dataframe/src/dataframe/utils.rs | 16 - .../src/dataframe/values/mod.rs | 14 - .../values/nu_dataframe/between_values.rs | 884 ---------- .../values/nu_dataframe/conversion.rs | 1435 ----------------- .../values/nu_dataframe/custom_value.rs | 79 - .../src/dataframe/values/nu_dataframe/mod.rs | 580 ------- .../values/nu_dataframe/operations.rs | 206 --- .../values/nu_expression/custom_value.rs | 147 -- .../src/dataframe/values/nu_expression/mod.rs | 443 ----- .../values/nu_lazyframe/custom_value.rs | 50 - .../src/dataframe/values/nu_lazyframe/mod.rs | 188 --- .../values/nu_lazygroupby/custom_value.rs | 44 - .../dataframe/values/nu_lazygroupby/mod.rs | 113 -- .../src/dataframe/values/nu_schema.rs | 376 ----- .../dataframe/values/nu_when/custom_value.rs | 41 - .../src/dataframe/values/nu_when/mod.rs | 77 - .../src/dataframe/values/utils.rs | 86 - crates/nu-cmd-dataframe/src/lib.rs | 4 - crates/nu-cmd-lang/Cargo.toml | 1 - crates/nu-cmd-lang/README.md | 1 - .../nu-cmd-lang/src/core_commands/version.rs | 5 - crates/nu-command/tests/commands/open.rs | 16 - crates/nu-command/tests/main.rs | 3 +- crates/nu-protocol/src/errors/shell_error.rs | 13 - crates/nu_plugin_polars/Cargo.toml | 2 +- devdocs/PLATFORM_SUPPORT.md | 8 +- scripts/build-all-maclin.sh | 4 +- scripts/build-all-windows.cmd | 4 +- scripts/build-all.nu | 4 +- scripts/install-all.ps1 | 4 +- scripts/install-all.sh | 4 +- src/main.rs | 2 - toolkit.nu | 29 +- 156 files changed, 56 insertions(+), 19464 deletions(-) delete mode 100644 crates/nu-cmd-dataframe/Cargo.toml delete mode 100644 crates/nu-cmd-dataframe/LICENSE delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/README.md delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/eager/append.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/eager/cast.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/eager/columns.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/eager/drop.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/eager/drop_duplicates.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/eager/drop_nulls.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/eager/dtypes.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/eager/dummies.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/eager/filter_with.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/eager/first.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/eager/get.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/eager/last.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/eager/list.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/eager/melt.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/eager/mod.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/eager/open.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/eager/query_df.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/eager/rename.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/eager/sample.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/eager/schema.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/eager/shape.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/eager/slice.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/eager/sql_context.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/eager/sql_expr.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/eager/summary.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/eager/take.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/eager/to_arrow.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/eager/to_avro.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/eager/to_csv.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/eager/to_df.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/eager/to_json_lines.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/eager/to_nu.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/eager/to_parquet.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/eager/with_column.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/expressions/alias.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/expressions/arg_where.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/expressions/col.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/expressions/concat_str.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/expressions/datepart.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/expressions/expressions_macro.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/expressions/is_in.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/expressions/lit.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/expressions/mod.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/expressions/otherwise.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/expressions/quantile.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/expressions/when.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/lazy/aggregate.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/lazy/collect.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/lazy/explode.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/lazy/fetch.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/lazy/fill_nan.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/lazy/fill_null.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/lazy/filter.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/lazy/flatten.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/lazy/groupby.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/lazy/join.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/lazy/macro_commands.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/lazy/mod.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/lazy/quantile.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/lazy/select.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/lazy/sort_by_expr.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/lazy/to_lazy.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/mod.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/series/all_false.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/series/all_true.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/series/arg_max.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/series/arg_min.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/series/cumulative.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/series/date/as_date.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/series/date/as_datetime.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/series/date/get_day.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/series/date/get_hour.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/series/date/get_minute.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/series/date/get_month.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/series/date/get_nanosecond.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/series/date/get_ordinal.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/series/date/get_second.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/series/date/get_week.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/series/date/get_weekday.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/series/date/get_year.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/series/date/mod.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/series/indexes/arg_sort.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/series/indexes/arg_true.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/series/indexes/arg_unique.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/series/indexes/mod.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/series/indexes/set_with_idx.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/series/masks/is_duplicated.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/series/masks/is_in.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/series/masks/is_not_null.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/series/masks/is_null.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/series/masks/is_unique.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/series/masks/mod.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/series/masks/not.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/series/masks/set.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/series/mod.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/series/n_null.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/series/n_unique.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/series/rolling.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/series/shift.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/series/string/concatenate.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/series/string/contains.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/series/string/mod.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/series/string/replace.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/series/string/replace_all.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/series/string/str_lengths.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/series/string/str_slice.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/series/string/strftime.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/series/string/to_lowercase.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/series/string/to_uppercase.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/series/unique.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/series/value_counts.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/stub.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/test_dataframe.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/utils.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/values/mod.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/values/nu_dataframe/between_values.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/values/nu_dataframe/conversion.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/values/nu_dataframe/custom_value.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/values/nu_dataframe/mod.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/values/nu_dataframe/operations.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/values/nu_expression/custom_value.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/values/nu_expression/mod.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/values/nu_lazyframe/custom_value.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/values/nu_lazyframe/mod.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/values/nu_lazygroupby/custom_value.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/values/nu_lazygroupby/mod.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/values/nu_schema.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/values/nu_when/custom_value.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/values/nu_when/mod.rs delete mode 100644 crates/nu-cmd-dataframe/src/dataframe/values/utils.rs delete mode 100644 crates/nu-cmd-dataframe/src/lib.rs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4815491854..3acdfa71d1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -29,17 +29,6 @@ jobs: # instead of 14 GB) which is too little for us right now. Revisit when `dfr` commands are # removed and we're only building the `polars` plugin instead platform: [windows-latest, macos-13, ubuntu-20.04] - feature: [default, dataframe] - include: - - feature: default - flags: "" - - feature: dataframe - flags: "--features=dataframe" - exclude: - - platform: windows-latest - feature: dataframe - - platform: macos-13 - feature: dataframe runs-on: ${{ matrix.platform }} @@ -48,43 +37,31 @@ jobs: - name: Setup Rust toolchain and cache uses: actions-rust-lang/setup-rust-toolchain@v1.8.0 - with: - rustflags: "" - name: cargo fmt run: cargo fmt --all -- --check # If changing these settings also change toolkit.nu - name: Clippy - run: cargo clippy --workspace ${{ matrix.flags }} --exclude nu_plugin_* -- $CLIPPY_OPTIONS + run: cargo clippy --workspace --exclude nu_plugin_* -- $CLIPPY_OPTIONS # In tests we don't have to deny unwrap - name: Clippy of tests - run: cargo clippy --tests --workspace ${{ matrix.flags }} --exclude nu_plugin_* -- -D warnings + run: cargo clippy --tests --workspace --exclude nu_plugin_* -- -D warnings - name: Clippy of benchmarks - run: cargo clippy --benches --workspace ${{ matrix.flags }} --exclude nu_plugin_* -- -D warnings + run: cargo clippy --benches --workspace --exclude nu_plugin_* -- -D warnings tests: strategy: fail-fast: true matrix: platform: [windows-latest, macos-latest, ubuntu-20.04] - feature: [default, dataframe] include: - # linux CI cannot handle clipboard feature - default-flags: "" - - platform: ubuntu-20.04 + # linux CI cannot handle clipboard feature + - platform: ubuntu-20.04 default-flags: "--no-default-features --features=default-no-clipboard" - - feature: default - flags: "" - - feature: dataframe - flags: "--features=dataframe" - exclude: - - platform: windows-latest - feature: dataframe - - platform: macos-latest - feature: dataframe runs-on: ${{ matrix.platform }} @@ -93,12 +70,9 @@ jobs: - name: Setup Rust toolchain and cache uses: actions-rust-lang/setup-rust-toolchain@v1.8.0 - with: - rustflags: "" - name: Tests - run: cargo test --workspace --profile ci --exclude nu_plugin_* ${{ matrix.default-flags }} ${{ matrix.flags }} - + run: cargo test --workspace --profile ci --exclude nu_plugin_* ${{ matrix.default-flags }} - name: Check for clean repo shell: bash run: | @@ -125,8 +99,6 @@ jobs: - name: Setup Rust toolchain and cache uses: actions-rust-lang/setup-rust-toolchain@v1.8.0 - with: - rustflags: "" - name: Install Nushell run: cargo install --path . --locked --no-default-features @@ -178,8 +150,6 @@ jobs: - name: Setup Rust toolchain and cache uses: actions-rust-lang/setup-rust-toolchain@v1.8.0 - with: - rustflags: "" - name: Clippy run: cargo clippy --package nu_plugin_* -- $CLIPPY_OPTIONS diff --git a/.github/workflows/nightly-build.yml b/.github/workflows/nightly-build.yml index ab9f93d97d..418b392fce 100644 --- a/.github/workflows/nightly-build.yml +++ b/.github/workflows/nightly-build.yml @@ -84,41 +84,30 @@ jobs: include: - target: aarch64-apple-darwin os: macos-latest - target_rustflags: '' - target: x86_64-apple-darwin os: macos-latest - target_rustflags: '' - target: x86_64-pc-windows-msvc extra: 'bin' os: windows-latest - target_rustflags: '' - target: x86_64-pc-windows-msvc extra: msi os: windows-latest - target_rustflags: '' - target: aarch64-pc-windows-msvc extra: 'bin' os: windows-latest - target_rustflags: '' - target: aarch64-pc-windows-msvc extra: msi os: windows-latest - target_rustflags: '' - target: x86_64-unknown-linux-gnu os: ubuntu-20.04 - target_rustflags: '' - target: x86_64-unknown-linux-musl os: ubuntu-20.04 - target_rustflags: '' - target: aarch64-unknown-linux-gnu os: ubuntu-20.04 - target_rustflags: '' - target: armv7-unknown-linux-gnueabihf os: ubuntu-20.04 - target_rustflags: '' - target: riscv64gc-unknown-linux-gnu os: ubuntu-latest - target_rustflags: '' runs-on: ${{matrix.os}} @@ -134,7 +123,7 @@ jobs: - name: Setup Rust toolchain and cache uses: actions-rust-lang/setup-rust-toolchain@v1.8.0 - # WARN: Keep the rustflags to prevent from the winget submission error: `CAQuietExec: Error 0xc0000135` + # WARN: Keep the rustflags to prevent from the winget submission error: `CAQuietExec: Error 0xc0000135` with: rustflags: '' @@ -147,12 +136,10 @@ jobs: id: nu run: nu .github/workflows/release-pkg.nu env: - RELEASE_TYPE: standard OS: ${{ matrix.os }} REF: ${{ github.ref }} TARGET: ${{ matrix.target }} _EXTRA_: ${{ matrix.extra }} - TARGET_RUSTFLAGS: ${{ matrix.target_rustflags }} - name: Create an Issue for Release Failure if: ${{ failure() }} @@ -184,122 +171,6 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - full: - name: Full - needs: prepare - strategy: - fail-fast: false - matrix: - target: - - aarch64-apple-darwin - - x86_64-apple-darwin - - x86_64-pc-windows-msvc - - aarch64-pc-windows-msvc - - x86_64-unknown-linux-gnu - - x86_64-unknown-linux-musl - - aarch64-unknown-linux-gnu - extra: ['bin'] - include: - - target: aarch64-apple-darwin - os: macos-latest - target_rustflags: '--features=dataframe' - - target: x86_64-apple-darwin - os: macos-latest - target_rustflags: '--features=dataframe' - - target: x86_64-pc-windows-msvc - extra: 'bin' - os: windows-latest - target_rustflags: '--features=dataframe' - - target: x86_64-pc-windows-msvc - extra: msi - os: windows-latest - target_rustflags: '--features=dataframe' - - target: aarch64-pc-windows-msvc - extra: 'bin' - os: windows-latest - target_rustflags: '--features=dataframe' - - target: aarch64-pc-windows-msvc - extra: msi - os: windows-latest - target_rustflags: '--features=dataframe' - - target: x86_64-unknown-linux-gnu - os: ubuntu-20.04 - target_rustflags: '--features=dataframe' - - target: x86_64-unknown-linux-musl - os: ubuntu-20.04 - target_rustflags: '--features=dataframe' - - target: aarch64-unknown-linux-gnu - os: ubuntu-20.04 - target_rustflags: '--features=dataframe' - - runs-on: ${{matrix.os}} - - steps: - - uses: actions/checkout@v4.1.5 - with: - ref: main - fetch-depth: 0 - - - name: Update Rust Toolchain Target - run: | - echo "targets = ['${{matrix.target}}']" >> rust-toolchain.toml - - - name: Setup Rust toolchain and cache - uses: actions-rust-lang/setup-rust-toolchain@v1.8.0 - # WARN: Keep the rustflags to prevent from the winget submission error: `CAQuietExec: Error 0xc0000135` - with: - rustflags: '' - - - name: Setup Nushell - uses: hustcer/setup-nu@v3.10 - with: - version: 0.93.0 - - - name: Release Nu Binary - id: nu - run: nu .github/workflows/release-pkg.nu - env: - RELEASE_TYPE: full - OS: ${{ matrix.os }} - REF: ${{ github.ref }} - TARGET: ${{ matrix.target }} - _EXTRA_: ${{ matrix.extra }} - TARGET_RUSTFLAGS: ${{ matrix.target_rustflags }} - - - name: Create an Issue for Release Failure - if: ${{ failure() }} - uses: JasonEtco/create-an-issue@v2.9.2 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - with: - update_existing: true - search_existing: open - filename: .github/AUTO_ISSUE_TEMPLATE/nightly-build-fail.md - - - name: Set Outputs of Short SHA - id: vars - run: | - echo "date=$(date -u +'%Y-%m-%d')" >> $GITHUB_OUTPUT - sha_short=$(git rev-parse --short HEAD) - echo "sha_short=${sha_short:0:7}" >> $GITHUB_OUTPUT - - # REF: https://github.com/marketplace/actions/gh-release - # Create a release only in nushell/nightly repo - - name: Publish Archive - uses: softprops/action-gh-release@v2.0.5 - if: ${{ startsWith(github.repository, 'nushell/nightly') }} - with: - draft: false - prerelease: true - name: Nu-nightly-${{ steps.vars.outputs.date }}-${{ steps.vars.outputs.sha_short }} - tag_name: nightly-${{ steps.vars.outputs.sha_short }} - body: | - This is a NIGHTLY build of Nushell. - It is NOT recommended for production use. - files: ${{ steps.nu.outputs.archive }} - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - cleanup: name: Cleanup # Should only run in nushell/nightly repo diff --git a/.github/workflows/release-pkg.nu b/.github/workflows/release-pkg.nu index 046ea2475e..da2779f6d1 100755 --- a/.github/workflows/release-pkg.nu +++ b/.github/workflows/release-pkg.nu @@ -9,7 +9,6 @@ # Instructions for manually creating an MSI for Winget Releases when they fail # Added 2022-11-29 when Windows packaging wouldn't work # Updated again on 2023-02-23 because msis are still failing validation -# Update on 2023-10-18 to use RELEASE_TYPE env var to determine if full or not # To run this manual for windows here are the steps I take # checkout the release you want to publish # 1. git checkout 0.86.0 @@ -17,28 +16,26 @@ # 2. $env:CARGO_TARGET_DIR = "" # 2. hide-env CARGO_TARGET_DIR # 3. $env.TARGET = 'x86_64-pc-windows-msvc' -# 4. $env.TARGET_RUSTFLAGS = '' -# 5. $env.GITHUB_WORKSPACE = 'D:\nushell' -# 6. $env.GITHUB_OUTPUT = 'D:\nushell\output\out.txt' -# 7. $env.OS = 'windows-latest' -# 8. $env.RELEASE_TYPE = '' # There is full and '' for normal releases +# 4. $env.GITHUB_WORKSPACE = 'D:\nushell' +# 5. $env.GITHUB_OUTPUT = 'D:\nushell\output\out.txt' +# 6. $env.OS = 'windows-latest' # make sure 7z.exe is in your path https://www.7-zip.org/download.html -# 9. $env.Path = ($env.Path | append 'c:\apps\7-zip') +# 7. $env.Path = ($env.Path | append 'c:\apps\7-zip') # make sure aria2c.exe is in your path https://github.com/aria2/aria2 -# 10. $env.Path = ($env.Path | append 'c:\path\to\aria2c') +# 8. $env.Path = ($env.Path | append 'c:\path\to\aria2c') # make sure you have the wixtools installed https://wixtoolset.org/ -# 11. $env.Path = ($env.Path | append 'C:\Users\dschroeder\AppData\Local\tauri\WixTools') +# 9. $env.Path = ($env.Path | append 'C:\Users\dschroeder\AppData\Local\tauri\WixTools') # You need to run the release-pkg twice. The first pass, with _EXTRA_ as 'bin', makes the output # folder and builds everything. The second pass, that generates the msi file, with _EXTRA_ as 'msi' -# 12. $env._EXTRA_ = 'bin' -# 13. source .github\workflows\release-pkg.nu -# 14. cd .. -# 15. $env._EXTRA_ = 'msi' -# 16. source .github\workflows\release-pkg.nu +# 10. $env._EXTRA_ = 'bin' +# 11. source .github\workflows\release-pkg.nu +# 12. cd .. +# 13. $env._EXTRA_ = 'msi' +# 14. source .github\workflows\release-pkg.nu # After msi is generated, you have to update winget-pkgs repo, you'll need to patch the release # by deleting the existing msi and uploading this new msi. Then you'll need to update the hash # on the winget-pkgs PR. To generate the hash, run this command -# 17. open target\wix\nu-0.74.0-x86_64-pc-windows-msvc.msi | hash sha256 +# 15. open target\wix\nu-0.74.0-x86_64-pc-windows-msvc.msi | hash sha256 # Then, just take the output and put it in the winget-pkgs PR for the hash on the msi @@ -48,31 +45,15 @@ let os = $env.OS let target = $env.TARGET # Repo source dir like `/home/runner/work/nushell/nushell` let src = $env.GITHUB_WORKSPACE -let flags = $env.TARGET_RUSTFLAGS let dist = $'($env.GITHUB_WORKSPACE)/output' let version = (open Cargo.toml | get package.version) print $'Debugging info:' -print { version: $version, bin: $bin, os: $os, releaseType: $env.RELEASE_TYPE, target: $target, src: $src, flags: $flags, dist: $dist }; hr-line -b - -# Rename the full release name so that we won't break the existing scripts for standard release downloading, such as: -# curl -s https://api.github.com/repos/chmln/sd/releases/latest | grep browser_download_url | cut -d '"' -f 4 | grep x86_64-unknown-linux-musl -const FULL_RLS_NAMING = { - x86_64-apple-darwin: 'x86_64-darwin-full', - aarch64-apple-darwin: 'aarch64-darwin-full', - x86_64-unknown-linux-gnu: 'x86_64-linux-gnu-full', - x86_64-pc-windows-msvc: 'x86_64-windows-msvc-full', - x86_64-unknown-linux-musl: 'x86_64-linux-musl-full', - aarch64-unknown-linux-gnu: 'aarch64-linux-gnu-full', - aarch64-pc-windows-msvc: 'aarch64-windows-msvc-full', - riscv64gc-unknown-linux-gnu: 'riscv64-linux-gnu-full', - armv7-unknown-linux-gnueabihf: 'armv7-linux-gnueabihf-full', -} +print { version: $version, bin: $bin, os: $os, target: $target, src: $src, dist: $dist }; hr-line -b # $env let USE_UBUNTU = $os starts-with ubuntu -let FULL_NAME = $FULL_RLS_NAMING | get -i $target | default 'unknown-target-full' print $'(char nl)Packaging ($bin) v($version) for ($target) in ($src)...'; hr-line -b if not ('Cargo.lock' | path exists) { cargo generate-lockfile } @@ -91,23 +72,23 @@ if $os in ['macos-latest'] or $USE_UBUNTU { 'aarch64-unknown-linux-gnu' => { sudo apt-get install gcc-aarch64-linux-gnu -y $env.CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER = 'aarch64-linux-gnu-gcc' - cargo-build-nu $flags + cargo-build-nu } 'riscv64gc-unknown-linux-gnu' => { sudo apt-get install gcc-riscv64-linux-gnu -y $env.CARGO_TARGET_RISCV64GC_UNKNOWN_LINUX_GNU_LINKER = 'riscv64-linux-gnu-gcc' - cargo-build-nu $flags + cargo-build-nu } 'armv7-unknown-linux-gnueabihf' => { sudo apt-get install pkg-config gcc-arm-linux-gnueabihf -y $env.CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_LINKER = 'arm-linux-gnueabihf-gcc' - cargo-build-nu $flags + cargo-build-nu } _ => { # musl-tools to fix 'Failed to find tool. Is `musl-gcc` installed?' # Actually just for x86_64-unknown-linux-musl target if $USE_UBUNTU { sudo apt install musl-tools -y } - cargo-build-nu $flags + cargo-build-nu } } } @@ -116,7 +97,7 @@ if $os in ['macos-latest'] or $USE_UBUNTU { # Build for Windows without static-link-openssl feature # ---------------------------------------------------------------------------- if $os in ['windows-latest'] { - cargo-build-nu $flags + cargo-build-nu } # ---------------------------------------------------------------------------- @@ -162,7 +143,7 @@ cd $dist; print $'(char nl)Creating release archive...'; hr-line if $os in ['macos-latest'] or $USE_UBUNTU { let files = (ls | get name) - let dest = if $env.RELEASE_TYPE == 'full' { $'($bin)-($version)-($FULL_NAME)' } else { $'($bin)-($version)-($target)' } + let dest = $'($bin)-($version)-($target)' let archive = $'($dist)/($dest).tar.gz' mkdir $dest @@ -177,7 +158,7 @@ if $os in ['macos-latest'] or $USE_UBUNTU { } else if $os == 'windows-latest' { - let releaseStem = if $env.RELEASE_TYPE == 'full' { $'($bin)-($version)-($FULL_NAME)' } else { $'($bin)-($version)-($target)' } + let releaseStem = $'($bin)-($version)-($target)' print $'(char nl)Download less related stuffs...'; hr-line aria2c https://github.com/jftuga/less-Windows/releases/download/less-v608/less.exe -o less.exe @@ -214,19 +195,11 @@ if $os in ['macos-latest'] or $USE_UBUNTU { } } -def 'cargo-build-nu' [ options: string ] { - if ($options | str trim | is-empty) { - if $os == 'windows-latest' { - cargo build --release --all --target $target - } else { - cargo build --release --all --target $target --features=static-link-openssl - } +def 'cargo-build-nu' [] { + if $os == 'windows-latest' { + cargo build --release --all --target $target } else { - if $os == 'windows-latest' { - cargo build --release --all --target $target $options - } else { - cargo build --release --all --target $target --features=static-link-openssl $options - } + cargo build --release --all --target $target --features=static-link-openssl } } diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index ffe653bd22..fb1b384d54 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -34,41 +34,30 @@ jobs: include: - target: aarch64-apple-darwin os: macos-latest - target_rustflags: '' - target: x86_64-apple-darwin os: macos-latest - target_rustflags: '' - target: x86_64-pc-windows-msvc extra: 'bin' os: windows-latest - target_rustflags: '' - target: x86_64-pc-windows-msvc extra: msi os: windows-latest - target_rustflags: '' - target: aarch64-pc-windows-msvc extra: 'bin' os: windows-latest - target_rustflags: '' - target: aarch64-pc-windows-msvc extra: msi os: windows-latest - target_rustflags: '' - target: x86_64-unknown-linux-gnu os: ubuntu-20.04 - target_rustflags: '' - target: x86_64-unknown-linux-musl os: ubuntu-20.04 - target_rustflags: '' - target: aarch64-unknown-linux-gnu os: ubuntu-20.04 - target_rustflags: '' - target: armv7-unknown-linux-gnueabihf os: ubuntu-20.04 - target_rustflags: '' - target: riscv64gc-unknown-linux-gnu os: ubuntu-latest - target_rustflags: '' runs-on: ${{matrix.os}} @@ -81,7 +70,7 @@ jobs: - name: Setup Rust toolchain uses: actions-rust-lang/setup-rust-toolchain@v1.8.0 - # WARN: Keep the rustflags to prevent from the winget submission error: `CAQuietExec: Error 0xc0000135` + # WARN: Keep the rustflags to prevent from the winget submission error: `CAQuietExec: Error 0xc0000135` with: cache: false rustflags: '' @@ -95,102 +84,10 @@ jobs: id: nu run: nu .github/workflows/release-pkg.nu env: - RELEASE_TYPE: standard OS: ${{ matrix.os }} REF: ${{ github.ref }} TARGET: ${{ matrix.target }} _EXTRA_: ${{ matrix.extra }} - TARGET_RUSTFLAGS: ${{ matrix.target_rustflags }} - - # REF: https://github.com/marketplace/actions/gh-release - - name: Publish Archive - uses: softprops/action-gh-release@v2.0.5 - if: ${{ startsWith(github.ref, 'refs/tags/') }} - with: - draft: true - files: ${{ steps.nu.outputs.archive }} - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - full: - name: Full - - strategy: - fail-fast: false - matrix: - target: - - aarch64-apple-darwin - - x86_64-apple-darwin - - x86_64-pc-windows-msvc - - aarch64-pc-windows-msvc - - x86_64-unknown-linux-gnu - - x86_64-unknown-linux-musl - - aarch64-unknown-linux-gnu - extra: ['bin'] - include: - - target: aarch64-apple-darwin - os: macos-latest - target_rustflags: '--features=dataframe' - - target: x86_64-apple-darwin - os: macos-latest - target_rustflags: '--features=dataframe' - - target: x86_64-pc-windows-msvc - extra: 'bin' - os: windows-latest - target_rustflags: '--features=dataframe' - - target: x86_64-pc-windows-msvc - extra: msi - os: windows-latest - target_rustflags: '--features=dataframe' - - target: aarch64-pc-windows-msvc - extra: 'bin' - os: windows-latest - target_rustflags: '--features=dataframe' - - target: aarch64-pc-windows-msvc - extra: msi - os: windows-latest - target_rustflags: '--features=dataframe' - - target: x86_64-unknown-linux-gnu - os: ubuntu-20.04 - target_rustflags: '--features=dataframe' - - target: x86_64-unknown-linux-musl - os: ubuntu-20.04 - target_rustflags: '--features=dataframe' - - target: aarch64-unknown-linux-gnu - os: ubuntu-20.04 - target_rustflags: '--features=dataframe' - - runs-on: ${{matrix.os}} - - steps: - - uses: actions/checkout@v4.1.5 - - - name: Update Rust Toolchain Target - run: | - echo "targets = ['${{matrix.target}}']" >> rust-toolchain.toml - - - name: Setup Rust toolchain - uses: actions-rust-lang/setup-rust-toolchain@v1.8.0 - # WARN: Keep the rustflags to prevent from the winget submission error: `CAQuietExec: Error 0xc0000135` - with: - cache: false - rustflags: '' - - - name: Setup Nushell - uses: hustcer/setup-nu@v3.10 - with: - version: 0.93.0 - - - name: Release Nu Binary - id: nu - run: nu .github/workflows/release-pkg.nu - env: - RELEASE_TYPE: full - OS: ${{ matrix.os }} - REF: ${{ github.ref }} - TARGET: ${{ matrix.target }} - _EXTRA_: ${{ matrix.extra }} - TARGET_RUSTFLAGS: ${{ matrix.target_rustflags }} # REF: https://github.com/marketplace/actions/gh-release - name: Publish Archive diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index d5cf758b56..6d7c256d4c 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -71,11 +71,6 @@ Read cargo's documentation for more details: https://doc.rust-lang.org/cargo/ref cargo run ``` -- Build and run with dataframe support. - ```nushell - cargo run --features=dataframe - ``` - - Run Clippy on Nushell: ```nushell @@ -93,11 +88,6 @@ Read cargo's documentation for more details: https://doc.rust-lang.org/cargo/ref cargo test --workspace ``` - along with dataframe tests - - ```nushell - cargo test --workspace --features=dataframe - ``` or via the `toolkit.nu` command: ```nushell use toolkit.nu test @@ -240,7 +230,7 @@ You can help us to make the review process a smooth experience: - Choose what simplifies having confidence in the conflict resolution and the review. **Merge commits in your branch are OK** in the squash model. - Feel free to notify your reviewers or affected PR authors if your change might cause larger conflicts with another change. - During the rollup of multiple PRs, we may choose to resolve merge conflicts and CI failures ourselves. (Allow maintainers to push to your branch to enable us to do this quickly.) - + ## License We use the [MIT License](https://github.com/nushell/nushell/blob/main/LICENSE) in all of our Nushell projects. If you are including or referencing a crate that uses the [GPL License](https://www.gnu.org/licenses/gpl-3.0.en.html#license-text) unfortunately we will not be able to accept your PR. diff --git a/Cargo.lock b/Cargo.lock index ad5a1e3a76..fb792cbd2a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2782,7 +2782,6 @@ dependencies = [ "nix", "nu-cli", "nu-cmd-base", - "nu-cmd-dataframe", "nu-cmd-extra", "nu-cmd-lang", "nu-cmd-plugin", @@ -2870,29 +2869,6 @@ dependencies = [ "nu-protocol", ] -[[package]] -name = "nu-cmd-dataframe" -version = "0.93.1" -dependencies = [ - "chrono", - "chrono-tz 0.8.6", - "fancy-regex", - "indexmap", - "nu-cmd-lang", - "nu-engine", - "nu-parser", - "nu-protocol", - "num", - "polars", - "polars-arrow", - "polars-io", - "polars-ops", - "polars-plan", - "polars-utils", - "serde", - "sqlparser 0.45.0", -] - [[package]] name = "nu-cmd-extra" version = "0.93.1" diff --git a/Cargo.toml b/Cargo.toml index 2e9c7e0b0f..afc31383f9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -32,7 +32,6 @@ members = [ "crates/nu-cmd-extra", "crates/nu-cmd-lang", "crates/nu-cmd-plugin", - "crates/nu-cmd-dataframe", "crates/nu-command", "crates/nu-color-config", "crates/nu-explore", @@ -179,9 +178,6 @@ nu-cli = { path = "./crates/nu-cli", version = "0.93.1" } nu-cmd-base = { path = "./crates/nu-cmd-base", version = "0.93.1" } nu-cmd-lang = { path = "./crates/nu-cmd-lang", version = "0.93.1" } nu-cmd-plugin = { path = "./crates/nu-cmd-plugin", version = "0.93.1", optional = true } -nu-cmd-dataframe = { path = "./crates/nu-cmd-dataframe", version = "0.93.1", features = [ - "dataframe", -], optional = true } nu-cmd-extra = { path = "./crates/nu-cmd-extra", version = "0.93.1" } nu-command = { path = "./crates/nu-command", version = "0.93.1" } nu-engine = { path = "./crates/nu-engine", version = "0.93.1" } @@ -271,9 +267,6 @@ system-clipboard = [ which-support = ["nu-command/which-support", "nu-cmd-lang/which-support"] trash-support = ["nu-command/trash-support", "nu-cmd-lang/trash-support"] -# Dataframe feature for nushell -dataframe = ["dep:nu-cmd-dataframe", "nu-cmd-lang/dataframe"] - # SQLite commands for nushell sqlite = ["nu-command/sqlite", "nu-cmd-lang/sqlite"] diff --git a/crates/nu-cmd-dataframe/Cargo.toml b/crates/nu-cmd-dataframe/Cargo.toml deleted file mode 100644 index a156435a8d..0000000000 --- a/crates/nu-cmd-dataframe/Cargo.toml +++ /dev/null @@ -1,75 +0,0 @@ -[package] -authors = ["The Nushell Project Developers"] -description = "Nushell's dataframe commands based on polars." -edition = "2021" -license = "MIT" -name = "nu-cmd-dataframe" -repository = "https://github.com/nushell/nushell/tree/main/crates/nu-cmd-dataframe" -version = "0.93.1" - -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - -[lib] -bench = false - -[dependencies] -nu-engine = { path = "../nu-engine", version = "0.93.1" } -nu-parser = { path = "../nu-parser", version = "0.93.1" } -nu-protocol = { path = "../nu-protocol", version = "0.93.1" } - -# Potential dependencies for extras -chrono = { workspace = true, features = ["std", "unstable-locales"], default-features = false } -chrono-tz = { workspace = true } -fancy-regex = { workspace = true } -indexmap = { workspace = true } -num = { version = "0.4", optional = true } -serde = { workspace = true, features = ["derive"] } -# keep sqlparser at 0.39.0 until we can update polars -sqlparser = { version = "0.45", optional = true } -polars-io = { version = "0.39", features = ["avro"], optional = true } -polars-arrow = { version = "0.39", optional = true } -polars-ops = { version = "0.39", optional = true } -polars-plan = { version = "0.39", features = ["regex"], optional = true } -polars-utils = { version = "0.39", optional = true } - -[dependencies.polars] -features = [ - "arg_where", - "checked_arithmetic", - "concat_str", - "cross_join", - "csv", - "cum_agg", - "dtype-categorical", - "dtype-datetime", - "dtype-struct", - "dtype-i8", - "dtype-i16", - "dtype-u8", - "dtype-u16", - "dynamic_group_by", - "ipc", - "is_in", - "json", - "lazy", - "object", - "parquet", - "random", - "rolling_window", - "rows", - "serde", - "serde-lazy", - "strings", - "temporal", - "to_dummies", -] -default-features = false -optional = true -version = "0.39" - -[features] -dataframe = ["num", "polars", "polars-io", "polars-arrow", "polars-ops", "polars-plan", "polars-utils", "sqlparser"] -default = [] - -[dev-dependencies] -nu-cmd-lang = { path = "../nu-cmd-lang", version = "0.93.1" } diff --git a/crates/nu-cmd-dataframe/LICENSE b/crates/nu-cmd-dataframe/LICENSE deleted file mode 100644 index ae174e8595..0000000000 --- a/crates/nu-cmd-dataframe/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -MIT License - -Copyright (c) 2019 - 2023 The Nushell Project Developers - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/crates/nu-cmd-dataframe/src/dataframe/README.md b/crates/nu-cmd-dataframe/src/dataframe/README.md deleted file mode 100644 index 593217ede6..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/README.md +++ /dev/null @@ -1,12 +0,0 @@ -# Dataframe - -This dataframe directory holds all of the definitions of the dataframe data structures and commands. - -There are three sections of commands: - -* [eager](./eager) -* [series](./series) -* [values](./values) - -For more details see the -[Nushell book section on dataframes](https://www.nushell.sh/book/dataframes.html) diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/append.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/append.rs deleted file mode 100644 index c0be67ed6e..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/append.rs +++ /dev/null @@ -1,134 +0,0 @@ -use crate::dataframe::values::{Axis, Column, NuDataFrame}; -use nu_engine::command_prelude::*; - -#[derive(Clone)] -pub struct AppendDF; - -impl Command for AppendDF { - fn name(&self) -> &str { - "dfr append" - } - - fn usage(&self) -> &str { - "Appends a new dataframe." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .required("other", SyntaxShape::Any, "dataframe to be appended") - .switch("col", "appends in col orientation", Some('c')) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![ - Example { - description: "Appends a dataframe as new columns", - example: r#"let a = ([[a b]; [1 2] [3 4]] | dfr into-df); - $a | dfr append $a"#, - result: Some( - NuDataFrame::try_from_columns( - vec![ - Column::new( - "a".to_string(), - vec![Value::test_int(1), Value::test_int(3)], - ), - Column::new( - "b".to_string(), - vec![Value::test_int(2), Value::test_int(4)], - ), - Column::new( - "a_x".to_string(), - vec![Value::test_int(1), Value::test_int(3)], - ), - Column::new( - "b_x".to_string(), - vec![Value::test_int(2), Value::test_int(4)], - ), - ], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - Example { - description: "Appends a dataframe merging at the end of columns", - example: r#"let a = ([[a b]; [1 2] [3 4]] | dfr into-df); - $a | dfr append $a --col"#, - result: Some( - NuDataFrame::try_from_columns( - vec![ - Column::new( - "a".to_string(), - vec![ - Value::test_int(1), - Value::test_int(3), - Value::test_int(1), - Value::test_int(3), - ], - ), - Column::new( - "b".to_string(), - vec![ - Value::test_int(2), - Value::test_int(4), - Value::test_int(2), - Value::test_int(4), - ], - ), - ], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - ] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let other: Value = call.req(engine_state, stack, 0)?; - - let axis = if call.has_flag(engine_state, stack, "col")? { - Axis::Column - } else { - Axis::Row - }; - let df_other = NuDataFrame::try_from_value(other)?; - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - - df.append_df(&df_other, axis, call.head) - .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) -} - -#[cfg(test)] -mod test { - use super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(AppendDF {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/cast.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/cast.rs deleted file mode 100644 index be9c33a229..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/cast.rs +++ /dev/null @@ -1,195 +0,0 @@ -use crate::dataframe::values::{str_to_dtype, NuDataFrame, NuExpression, NuLazyFrame}; -use nu_engine::command_prelude::*; - -use polars::prelude::*; - -#[derive(Clone)] -pub struct CastDF; - -impl Command for CastDF { - fn name(&self) -> &str { - "dfr cast" - } - - fn usage(&self) -> &str { - "Cast a column to a different dtype." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .input_output_types(vec![ - ( - Type::Custom("expression".into()), - Type::Custom("expression".into()), - ), - ( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ), - ]) - .required( - "dtype", - SyntaxShape::String, - "The dtype to cast the column to", - ) - .optional( - "column", - SyntaxShape::String, - "The column to cast. Required when used with a dataframe.", - ) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![ - Example { - description: "Cast a column in a dataframe to a different dtype", - example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr cast u8 a | dfr schema", - result: Some(Value::record( - record! { - "a" => Value::string("u8", Span::test_data()), - "b" => Value::string("i64", Span::test_data()), - }, - Span::test_data(), - )), - }, - Example { - description: "Cast a column in a lazy dataframe to a different dtype", - example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr into-lazy | dfr cast u8 a | dfr schema", - result: Some(Value::record( - record! { - "a" => Value::string("u8", Span::test_data()), - "b" => Value::string("i64", Span::test_data()), - }, - Span::test_data(), - )), - }, - Example { - description: "Cast a column in a expression to a different dtype", - example: r#"[[a b]; [1 2] [1 4]] | dfr into-df | dfr group-by a | dfr agg [ (dfr col b | dfr cast u8 | dfr min | dfr as "b_min") ] | dfr schema"#, - result: None - } - ] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - let value = input.into_value(call.head)?; - if NuLazyFrame::can_downcast(&value) { - let (dtype, column_nm) = df_args(engine_state, stack, call)?; - let df = NuLazyFrame::try_from_value(value)?; - command_lazy(call, column_nm, dtype, df) - } else if NuDataFrame::can_downcast(&value) { - let (dtype, column_nm) = df_args(engine_state, stack, call)?; - let df = NuDataFrame::try_from_value(value)?; - command_eager(call, column_nm, dtype, df) - } else { - let dtype: String = call.req(engine_state, stack, 0)?; - let dtype = str_to_dtype(&dtype, call.head)?; - - let expr = NuExpression::try_from_value(value)?; - let expr: NuExpression = expr.into_polars().cast(dtype).into(); - - Ok(PipelineData::Value( - NuExpression::into_value(expr, call.head), - None, - )) - } - } -} - -fn df_args( - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, -) -> Result<(DataType, String), ShellError> { - let dtype = dtype_arg(engine_state, stack, call)?; - let column_nm: String = - call.opt(engine_state, stack, 1)? - .ok_or(ShellError::MissingParameter { - param_name: "column_name".into(), - span: call.head, - })?; - Ok((dtype, column_nm)) -} - -fn dtype_arg( - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, -) -> Result { - let dtype: String = call.req(engine_state, stack, 0)?; - str_to_dtype(&dtype, call.head) -} - -fn command_lazy( - call: &Call, - column_nm: String, - dtype: DataType, - lazy: NuLazyFrame, -) -> Result { - let column = col(&column_nm).cast(dtype); - let lazy = lazy.into_polars().with_columns(&[column]); - let lazy = NuLazyFrame::new(false, lazy); - - Ok(PipelineData::Value( - NuLazyFrame::into_value(lazy, call.head)?, - None, - )) -} - -fn command_eager( - call: &Call, - column_nm: String, - dtype: DataType, - nu_df: NuDataFrame, -) -> Result { - let mut df = nu_df.df; - let column = df - .column(&column_nm) - .map_err(|e| ShellError::GenericError { - error: format!("{e}"), - msg: "".into(), - span: Some(call.head), - help: None, - inner: vec![], - })?; - - let casted = column.cast(&dtype).map_err(|e| ShellError::GenericError { - error: format!("{e}"), - msg: "".into(), - span: Some(call.head), - help: None, - inner: vec![], - })?; - - let _ = df - .with_column(casted) - .map_err(|e| ShellError::GenericError { - error: format!("{e}"), - msg: "".into(), - span: Some(call.head), - help: None, - inner: vec![], - })?; - - let df = NuDataFrame::new(false, df); - Ok(PipelineData::Value(df.into_value(call.head), None)) -} - -#[cfg(test)] -mod test { - - use super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(CastDF {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/columns.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/columns.rs deleted file mode 100644 index c9167659b5..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/columns.rs +++ /dev/null @@ -1,73 +0,0 @@ -use crate::dataframe::values::NuDataFrame; -use nu_engine::command_prelude::*; - -#[derive(Clone)] -pub struct ColumnsDF; - -impl Command for ColumnsDF { - fn name(&self) -> &str { - "dfr columns" - } - - fn usage(&self) -> &str { - "Show dataframe columns." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .input_output_type(Type::Custom("dataframe".into()), Type::Any) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Dataframe columns", - example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr columns", - result: Some(Value::list( - vec![Value::test_string("a"), Value::test_string("b")], - Span::test_data(), - )), - }] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - _engine_state: &EngineState, - _stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - - let names: Vec = df - .as_ref() - .get_column_names() - .iter() - .map(|v| Value::string(*v, call.head)) - .collect(); - - let names = Value::list(names, call.head); - - Ok(PipelineData::Value(names, None)) -} - -#[cfg(test)] -mod test { - use super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(ColumnsDF {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/drop.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/drop.rs deleted file mode 100644 index 8f9d086947..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/drop.rs +++ /dev/null @@ -1,115 +0,0 @@ -use crate::dataframe::values::{utils::convert_columns, Column, NuDataFrame}; -use nu_engine::command_prelude::*; - -#[derive(Clone)] -pub struct DropDF; - -impl Command for DropDF { - fn name(&self) -> &str { - "dfr drop" - } - - fn usage(&self) -> &str { - "Creates a new dataframe by dropping the selected columns." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .rest("rest", SyntaxShape::Any, "column names to be dropped") - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "drop column a", - example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr drop a", - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new( - "b".to_string(), - vec![Value::test_int(2), Value::test_int(4)], - )], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let columns: Vec = call.rest(engine_state, stack, 0)?; - let (col_string, col_span) = convert_columns(columns, call.head)?; - - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - - let new_df = col_string - .first() - .ok_or_else(|| ShellError::GenericError { - error: "Empty names list".into(), - msg: "No column names were found".into(), - span: Some(col_span), - help: None, - inner: vec![], - }) - .and_then(|col| { - df.as_ref() - .drop(&col.item) - .map_err(|e| ShellError::GenericError { - error: "Error dropping column".into(), - msg: e.to_string(), - span: Some(col.span), - help: None, - inner: vec![], - }) - })?; - - // If there are more columns in the drop selection list, these - // are added from the resulting dataframe - col_string - .iter() - .skip(1) - .try_fold(new_df, |new_df, col| { - new_df - .drop(&col.item) - .map_err(|e| ShellError::GenericError { - error: "Error dropping column".into(), - msg: e.to_string(), - span: Some(col.span), - help: None, - inner: vec![], - }) - }) - .map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None)) -} - -#[cfg(test)] -mod test { - use super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(DropDF {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/drop_duplicates.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/drop_duplicates.rs deleted file mode 100644 index b2ae6f7cfc..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/drop_duplicates.rs +++ /dev/null @@ -1,119 +0,0 @@ -use crate::dataframe::values::{utils::convert_columns_string, Column, NuDataFrame}; -use nu_engine::command_prelude::*; - -use polars::prelude::UniqueKeepStrategy; - -#[derive(Clone)] -pub struct DropDuplicates; - -impl Command for DropDuplicates { - fn name(&self) -> &str { - "dfr drop-duplicates" - } - - fn usage(&self) -> &str { - "Drops duplicate values in dataframe." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .optional( - "subset", - SyntaxShape::Table(vec![]), - "subset of columns to drop duplicates", - ) - .switch("maintain", "maintain order", Some('m')) - .switch( - "last", - "keeps last duplicate value (by default keeps first)", - Some('l'), - ) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "drop duplicates", - example: "[[a b]; [1 2] [3 4] [1 2]] | dfr into-df | dfr drop-duplicates", - result: Some( - NuDataFrame::try_from_columns( - vec![ - Column::new( - "a".to_string(), - vec![Value::test_int(3), Value::test_int(1)], - ), - Column::new( - "b".to_string(), - vec![Value::test_int(4), Value::test_int(2)], - ), - ], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let columns: Option> = call.opt(engine_state, stack, 0)?; - let (subset, col_span) = match columns { - Some(cols) => { - let (agg_string, col_span) = convert_columns_string(cols, call.head)?; - (Some(agg_string), col_span) - } - None => (None, call.head), - }; - - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - - let subset_slice = subset.as_ref().map(|cols| &cols[..]); - - let keep_strategy = if call.has_flag(engine_state, stack, "last")? { - UniqueKeepStrategy::Last - } else { - UniqueKeepStrategy::First - }; - - df.as_ref() - .unique(subset_slice, keep_strategy, None) - .map_err(|e| ShellError::GenericError { - error: "Error dropping duplicates".into(), - msg: e.to_string(), - span: Some(col_span), - help: None, - inner: vec![], - }) - .map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None)) -} - -#[cfg(test)] -mod test { - use super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(DropDuplicates {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/drop_nulls.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/drop_nulls.rs deleted file mode 100644 index 25a3907426..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/drop_nulls.rs +++ /dev/null @@ -1,137 +0,0 @@ -use crate::dataframe::values::{utils::convert_columns_string, Column, NuDataFrame}; -use nu_engine::command_prelude::*; - -#[derive(Clone)] -pub struct DropNulls; - -impl Command for DropNulls { - fn name(&self) -> &str { - "dfr drop-nulls" - } - - fn usage(&self) -> &str { - "Drops null values in dataframe." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .optional( - "subset", - SyntaxShape::Table(vec![]), - "subset of columns to drop nulls", - ) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![ - Example { - description: "drop null values in dataframe", - example: r#"let df = ([[a b]; [1 2] [3 0] [1 2]] | dfr into-df); - let res = ($df.b / $df.b); - let a = ($df | dfr with-column $res --name res); - $a | dfr drop-nulls"#, - result: Some( - NuDataFrame::try_from_columns( - vec![ - Column::new( - "a".to_string(), - vec![Value::test_int(1), Value::test_int(1)], - ), - Column::new( - "b".to_string(), - vec![Value::test_int(2), Value::test_int(2)], - ), - Column::new( - "res".to_string(), - vec![Value::test_int(1), Value::test_int(1)], - ), - ], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - Example { - description: "drop null values in dataframe", - example: r#"let s = ([1 2 0 0 3 4] | dfr into-df); - ($s / $s) | dfr drop-nulls"#, - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new( - "div_0_0".to_string(), - vec![ - Value::test_int(1), - Value::test_int(1), - Value::test_int(1), - Value::test_int(1), - ], - )], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - ] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - - let columns: Option> = call.opt(engine_state, stack, 0)?; - - let (subset, col_span) = match columns { - Some(cols) => { - let (agg_string, col_span) = convert_columns_string(cols, call.head)?; - (Some(agg_string), col_span) - } - None => (None, call.head), - }; - - let subset_slice = subset.as_ref().map(|cols| &cols[..]); - - df.as_ref() - .drop_nulls(subset_slice) - .map_err(|e| ShellError::GenericError { - error: "Error dropping nulls".into(), - msg: e.to_string(), - span: Some(col_span), - help: None, - inner: vec![], - }) - .map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None)) -} - -#[cfg(test)] -mod test { - use super::super::super::test_dataframe::test_dataframe; - use super::super::WithColumn; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(DropNulls {}), Box::new(WithColumn {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/dtypes.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/dtypes.rs deleted file mode 100644 index a572a49551..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/dtypes.rs +++ /dev/null @@ -1,104 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame}; -use nu_engine::command_prelude::*; - -#[derive(Clone)] -pub struct DataTypes; - -impl Command for DataTypes { - fn name(&self) -> &str { - "dfr dtypes" - } - - fn usage(&self) -> &str { - "Show dataframe data types." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Dataframe dtypes", - example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr dtypes", - result: Some( - NuDataFrame::try_from_columns( - vec![ - Column::new( - "column".to_string(), - vec![Value::test_string("a"), Value::test_string("b")], - ), - Column::new( - "dtype".to_string(), - vec![Value::test_string("i64"), Value::test_string("i64")], - ), - ], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - _engine_state: &EngineState, - _stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - - let mut dtypes: Vec = Vec::new(); - let names: Vec = df - .as_ref() - .get_column_names() - .iter() - .map(|v| { - let dtype = df - .as_ref() - .column(v) - .expect("using name from list of names from dataframe") - .dtype(); - - let dtype_str = dtype.to_string(); - - dtypes.push(Value::string(dtype_str, call.head)); - - Value::string(*v, call.head) - }) - .collect(); - - let names_col = Column::new("column".to_string(), names); - let dtypes_col = Column::new("dtype".to_string(), dtypes); - - NuDataFrame::try_from_columns(vec![names_col, dtypes_col], None) - .map(|df| PipelineData::Value(df.into_value(call.head), None)) -} - -#[cfg(test)] -mod test { - use super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(DataTypes {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/dummies.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/dummies.rs deleted file mode 100644 index f47f65a004..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/dummies.rs +++ /dev/null @@ -1,107 +0,0 @@ -use crate::dataframe::values::NuDataFrame; -use nu_engine::command_prelude::*; - -use polars::{prelude::*, series::Series}; - -#[derive(Clone)] -pub struct Dummies; - -impl Command for Dummies { - fn name(&self) -> &str { - "dfr dummies" - } - - fn usage(&self) -> &str { - "Creates a new dataframe with dummy variables." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .switch("drop-first", "Drop first row", Some('d')) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![ - Example { - description: "Create new dataframe with dummy variables from a dataframe", - example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr dummies", - result: Some( - NuDataFrame::try_from_series( - vec![ - Series::new("a_1", &[1_u8, 0]), - Series::new("a_3", &[0_u8, 1]), - Series::new("b_2", &[1_u8, 0]), - Series::new("b_4", &[0_u8, 1]), - ], - Span::test_data(), - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - Example { - description: "Create new dataframe with dummy variables from a series", - example: "[1 2 2 3 3] | dfr into-df | dfr dummies", - result: Some( - NuDataFrame::try_from_series( - vec![ - Series::new("0_1", &[1_u8, 0, 0, 0, 0]), - Series::new("0_2", &[0_u8, 1, 1, 0, 0]), - Series::new("0_3", &[0_u8, 0, 0, 1, 1]), - ], - Span::test_data(), - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - ] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let drop_first: bool = call.has_flag(engine_state, stack, "drop-first")?; - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - - df.as_ref() - .to_dummies(None, drop_first) - .map_err(|e| ShellError::GenericError { - error: "Error calculating dummies".into(), - msg: e.to_string(), - span: Some(call.head), - help: Some("The only allowed column types for dummies are String or Int".into()), - inner: vec![], - }) - .map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None)) -} - -#[cfg(test)] -mod test { - use super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(Dummies {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/filter_with.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/filter_with.rs deleted file mode 100644 index e0e94d10a0..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/filter_with.rs +++ /dev/null @@ -1,154 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame}; -use nu_engine::command_prelude::*; - -use polars::prelude::LazyFrame; - -#[derive(Clone)] -pub struct FilterWith; - -impl Command for FilterWith { - fn name(&self) -> &str { - "dfr filter-with" - } - - fn usage(&self) -> &str { - "Filters dataframe using a mask or expression as reference." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .required( - "mask or expression", - SyntaxShape::Any, - "boolean mask used to filter data", - ) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("dataframe or lazyframe".into())) - } - - fn examples(&self) -> Vec { - vec![ - Example { - description: "Filter dataframe using a bool mask", - example: r#"let mask = ([true false] | dfr into-df); - [[a b]; [1 2] [3 4]] | dfr into-df | dfr filter-with $mask"#, - result: Some( - NuDataFrame::try_from_columns( - vec![ - Column::new("a".to_string(), vec![Value::test_int(1)]), - Column::new("b".to_string(), vec![Value::test_int(2)]), - ], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - Example { - description: "Filter dataframe using an expression", - example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr filter-with ((dfr col a) > 1)", - result: Some( - NuDataFrame::try_from_columns( - vec![ - Column::new("a".to_string(), vec![Value::test_int(3)]), - Column::new("b".to_string(), vec![Value::test_int(4)]), - ], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - ] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - let value = input.into_value(call.head)?; - if NuLazyFrame::can_downcast(&value) { - let df = NuLazyFrame::try_from_value(value)?; - command_lazy(engine_state, stack, call, df) - } else { - let df = NuDataFrame::try_from_value(value)?; - command_eager(engine_state, stack, call, df) - } - } -} - -fn command_eager( - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - df: NuDataFrame, -) -> Result { - let mask_value: Value = call.req(engine_state, stack, 0)?; - let mask_span = mask_value.span(); - - if NuExpression::can_downcast(&mask_value) { - let expression = NuExpression::try_from_value(mask_value)?; - let lazy = NuLazyFrame::new(true, df.lazy()); - let lazy = lazy.apply_with_expr(expression, LazyFrame::filter); - - Ok(PipelineData::Value( - NuLazyFrame::into_value(lazy, call.head)?, - None, - )) - } else { - let mask = NuDataFrame::try_from_value(mask_value)?.as_series(mask_span)?; - let mask = mask.bool().map_err(|e| ShellError::GenericError { - error: "Error casting to bool".into(), - msg: e.to_string(), - span: Some(mask_span), - help: Some("Perhaps you want to use a series with booleans as mask".into()), - inner: vec![], - })?; - - df.as_ref() - .filter(mask) - .map_err(|e| ShellError::GenericError { - error: "Error filtering dataframe".into(), - msg: e.to_string(), - span: Some(call.head), - help: Some("The only allowed column types for dummies are String or Int".into()), - inner: vec![], - }) - .map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None)) - } -} - -fn command_lazy( - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - lazy: NuLazyFrame, -) -> Result { - let expr: Value = call.req(engine_state, stack, 0)?; - let expr = NuExpression::try_from_value(expr)?; - - let lazy = lazy.apply_with_expr(expr, LazyFrame::filter); - - Ok(PipelineData::Value( - NuLazyFrame::into_value(lazy, call.head)?, - None, - )) -} - -#[cfg(test)] -mod test { - use super::super::super::test_dataframe::test_dataframe; - use super::*; - use crate::dataframe::expressions::ExprCol; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(FilterWith {}), Box::new(ExprCol {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/first.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/first.rs deleted file mode 100644 index 14c86e8c40..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/first.rs +++ /dev/null @@ -1,144 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame, NuExpression}; -use nu_engine::command_prelude::*; - -#[derive(Clone)] -pub struct FirstDF; - -impl Command for FirstDF { - fn name(&self) -> &str { - "dfr first" - } - - fn usage(&self) -> &str { - "Show only the first number of rows or create a first expression" - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .optional( - "rows", - SyntaxShape::Int, - "starting from the front, the number of rows to return", - ) - .input_output_types(vec![ - ( - Type::Custom("expression".into()), - Type::Custom("expression".into()), - ), - ( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ), - ]) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![ - Example { - description: "Return the first row of a dataframe", - example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr first", - result: Some( - NuDataFrame::try_from_columns( - vec![ - Column::new("a".to_string(), vec![Value::test_int(1)]), - Column::new("b".to_string(), vec![Value::test_int(2)]), - ], - None, - ) - .expect("should not fail") - .into_value(Span::test_data()), - ), - }, - Example { - description: "Return the first two rows of a dataframe", - example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr first 2", - result: Some( - NuDataFrame::try_from_columns( - vec![ - Column::new( - "a".to_string(), - vec![Value::test_int(1), Value::test_int(3)], - ), - Column::new( - "b".to_string(), - vec![Value::test_int(2), Value::test_int(4)], - ), - ], - None, - ) - .expect("should not fail") - .into_value(Span::test_data()), - ), - }, - Example { - description: "Creates a first expression from a column", - example: "dfr col a | dfr first", - result: None, - }, - ] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - let value = input.into_value(call.head)?; - if NuDataFrame::can_downcast(&value) { - let df = NuDataFrame::try_from_value(value)?; - command(engine_state, stack, call, df) - } else { - let expr = NuExpression::try_from_value(value)?; - let expr: NuExpression = expr.into_polars().first().into(); - - Ok(PipelineData::Value( - NuExpression::into_value(expr, call.head), - None, - )) - } - } -} - -fn command( - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - df: NuDataFrame, -) -> Result { - let rows: Option = call.opt(engine_state, stack, 0)?; - let rows = rows.unwrap_or(1); - - let res = df.as_ref().head(Some(rows)); - Ok(PipelineData::Value( - NuDataFrame::dataframe_into_value(res, call.head), - None, - )) -} - -#[cfg(test)] -mod test { - use super::super::super::test_dataframe::{build_test_engine_state, test_dataframe_example}; - use super::*; - use crate::dataframe::lazy::aggregate::LazyAggregate; - use crate::dataframe::lazy::groupby::ToLazyGroupBy; - - #[test] - fn test_examples_dataframe() { - let mut engine_state = build_test_engine_state(vec![Box::new(FirstDF {})]); - test_dataframe_example(&mut engine_state, &FirstDF.examples()[0]); - test_dataframe_example(&mut engine_state, &FirstDF.examples()[1]); - } - - #[test] - fn test_examples_expression() { - let mut engine_state = build_test_engine_state(vec![ - Box::new(FirstDF {}), - Box::new(LazyAggregate {}), - Box::new(ToLazyGroupBy {}), - ]); - test_dataframe_example(&mut engine_state, &FirstDF.examples()[2]); - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/get.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/get.rs deleted file mode 100644 index e8cf337864..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/get.rs +++ /dev/null @@ -1,87 +0,0 @@ -use crate::dataframe::values::{utils::convert_columns_string, Column, NuDataFrame}; -use nu_engine::command_prelude::*; - -#[derive(Clone)] -pub struct GetDF; - -impl Command for GetDF { - fn name(&self) -> &str { - "dfr get" - } - - fn usage(&self) -> &str { - "Creates dataframe with the selected columns." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .rest("rest", SyntaxShape::Any, "column names to sort dataframe") - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Returns the selected column", - example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr get a", - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new( - "a".to_string(), - vec![Value::test_int(1), Value::test_int(3)], - )], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let columns: Vec = call.rest(engine_state, stack, 0)?; - let (col_string, col_span) = convert_columns_string(columns, call.head)?; - - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - - df.as_ref() - .select(col_string) - .map_err(|e| ShellError::GenericError { - error: "Error selecting columns".into(), - msg: e.to_string(), - span: Some(col_span), - help: None, - inner: vec![], - }) - .map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None)) -} - -#[cfg(test)] -mod test { - use super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(GetDF {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/last.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/last.rs deleted file mode 100644 index ff2c4f98a2..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/last.rs +++ /dev/null @@ -1,118 +0,0 @@ -use crate::dataframe::values::{utils::DEFAULT_ROWS, Column, NuDataFrame, NuExpression}; -use nu_engine::command_prelude::*; - -#[derive(Clone)] -pub struct LastDF; - -impl Command for LastDF { - fn name(&self) -> &str { - "dfr last" - } - - fn usage(&self) -> &str { - "Creates new dataframe with tail rows or creates a last expression." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .optional("rows", SyntaxShape::Int, "Number of rows for tail") - .input_output_types(vec![ - ( - Type::Custom("expression".into()), - Type::Custom("expression".into()), - ), - ( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ), - ]) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![ - Example { - description: "Create new dataframe with last rows", - example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr last 1", - result: Some( - NuDataFrame::try_from_columns( - vec![ - Column::new("a".to_string(), vec![Value::test_int(3)]), - Column::new("b".to_string(), vec![Value::test_int(4)]), - ], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - Example { - description: "Creates a last expression from a column", - example: "dfr col a | dfr last", - result: None, - }, - ] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - let value = input.into_value(call.head)?; - if NuDataFrame::can_downcast(&value) { - let df = NuDataFrame::try_from_value(value)?; - command(engine_state, stack, call, df) - } else { - let expr = NuExpression::try_from_value(value)?; - let expr: NuExpression = expr.into_polars().last().into(); - - Ok(PipelineData::Value( - NuExpression::into_value(expr, call.head), - None, - )) - } - } -} - -fn command( - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - df: NuDataFrame, -) -> Result { - let rows: Option = call.opt(engine_state, stack, 0)?; - let rows = rows.unwrap_or(DEFAULT_ROWS); - - let res = df.as_ref().tail(Some(rows)); - Ok(PipelineData::Value( - NuDataFrame::dataframe_into_value(res, call.head), - None, - )) -} - -#[cfg(test)] -mod test { - use super::super::super::test_dataframe::{build_test_engine_state, test_dataframe_example}; - use super::*; - use crate::dataframe::lazy::aggregate::LazyAggregate; - use crate::dataframe::lazy::groupby::ToLazyGroupBy; - - #[test] - fn test_examples_dataframe() { - let mut engine_state = build_test_engine_state(vec![Box::new(LastDF {})]); - test_dataframe_example(&mut engine_state, &LastDF.examples()[0]); - } - - #[test] - fn test_examples_expression() { - let mut engine_state = build_test_engine_state(vec![ - Box::new(LastDF {}), - Box::new(LazyAggregate {}), - Box::new(ToLazyGroupBy {}), - ]); - test_dataframe_example(&mut engine_state, &LastDF.examples()[1]); - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/list.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/list.rs deleted file mode 100644 index 1cee694180..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/list.rs +++ /dev/null @@ -1,68 +0,0 @@ -use crate::dataframe::values::NuDataFrame; -use nu_engine::command_prelude::*; - -#[derive(Clone)] -pub struct ListDF; - -impl Command for ListDF { - fn name(&self) -> &str { - "dfr ls" - } - - fn usage(&self) -> &str { - "Lists stored dataframes." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()).category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Creates a new dataframe and shows it in the dataframe list", - example: r#"let test = ([[a b];[1 2] [3 4]] | dfr into-df); - ls"#, - result: None, - }] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - _input: PipelineData, - ) -> Result { - let mut vals: Vec<(String, Value)> = vec![]; - - for overlay_frame in engine_state.active_overlays(&[]) { - for var in &overlay_frame.vars { - if let Ok(value) = stack.get_var(*var.1, call.head) { - let name = String::from_utf8_lossy(var.0).to_string(); - vals.push((name, value)); - } - } - } - - let vals = vals - .into_iter() - .filter_map(|(name, value)| { - NuDataFrame::try_from_value(value).ok().map(|df| (name, df)) - }) - .map(|(name, df)| { - Value::record( - record! { - "name" => Value::string(name, call.head), - "columns" => Value::int(df.as_ref().width() as i64, call.head), - "rows" => Value::int(df.as_ref().height() as i64, call.head), - }, - call.head, - ) - }) - .collect::>(); - - let list = Value::list(vals, call.head); - - Ok(list.into_pipeline_data()) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/melt.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/melt.rs deleted file mode 100644 index 6379e9270e..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/melt.rs +++ /dev/null @@ -1,248 +0,0 @@ -use crate::dataframe::values::{utils::convert_columns_string, Column, NuDataFrame}; -use nu_engine::command_prelude::*; - -#[derive(Clone)] -pub struct MeltDF; - -impl Command for MeltDF { - fn name(&self) -> &str { - "dfr melt" - } - - fn usage(&self) -> &str { - "Unpivot a DataFrame from wide to long format." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .required_named( - "columns", - SyntaxShape::Table(vec![]), - "column names for melting", - Some('c'), - ) - .required_named( - "values", - SyntaxShape::Table(vec![]), - "column names used as value columns", - Some('v'), - ) - .named( - "variable-name", - SyntaxShape::String, - "optional name for variable column", - Some('r'), - ) - .named( - "value-name", - SyntaxShape::String, - "optional name for value column", - Some('l'), - ) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "melt dataframe", - example: - "[[a b c d]; [x 1 4 a] [y 2 5 b] [z 3 6 c]] | dfr into-df | dfr melt -c [b c] -v [a d]", - result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new( - "b".to_string(), - vec![ - Value::test_int(1), - Value::test_int(2), - Value::test_int(3), - Value::test_int(1), - Value::test_int(2), - Value::test_int(3), - ], - ), - Column::new( - "c".to_string(), - vec![ - Value::test_int(4), - Value::test_int(5), - Value::test_int(6), - Value::test_int(4), - Value::test_int(5), - Value::test_int(6), - ], - ), - Column::new( - "variable".to_string(), - vec![ - Value::test_string("a"), - Value::test_string("a"), - Value::test_string("a"), - Value::test_string("d"), - Value::test_string("d"), - Value::test_string("d"), - ], - ), - Column::new( - "value".to_string(), - vec![ - Value::test_string("x"), - Value::test_string("y"), - Value::test_string("z"), - Value::test_string("a"), - Value::test_string("b"), - Value::test_string("c"), - ], - ), - ], None) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let id_col: Vec = call - .get_flag(engine_state, stack, "columns")? - .expect("required value"); - let val_col: Vec = call - .get_flag(engine_state, stack, "values")? - .expect("required value"); - - let value_name: Option> = call.get_flag(engine_state, stack, "value-name")?; - let variable_name: Option> = - call.get_flag(engine_state, stack, "variable-name")?; - - let (id_col_string, id_col_span) = convert_columns_string(id_col, call.head)?; - let (val_col_string, val_col_span) = convert_columns_string(val_col, call.head)?; - - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - - check_column_datatypes(df.as_ref(), &id_col_string, id_col_span)?; - check_column_datatypes(df.as_ref(), &val_col_string, val_col_span)?; - - let mut res = df - .as_ref() - .melt(&id_col_string, &val_col_string) - .map_err(|e| ShellError::GenericError { - error: "Error calculating melt".into(), - msg: e.to_string(), - span: Some(call.head), - help: None, - inner: vec![], - })?; - - if let Some(name) = &variable_name { - res.rename("variable", &name.item) - .map_err(|e| ShellError::GenericError { - error: "Error renaming column".into(), - msg: e.to_string(), - span: Some(name.span), - help: None, - inner: vec![], - })?; - } - - if let Some(name) = &value_name { - res.rename("value", &name.item) - .map_err(|e| ShellError::GenericError { - error: "Error renaming column".into(), - msg: e.to_string(), - span: Some(name.span), - help: None, - inner: vec![], - })?; - } - - Ok(PipelineData::Value( - NuDataFrame::dataframe_into_value(res, call.head), - None, - )) -} - -fn check_column_datatypes>( - df: &polars::prelude::DataFrame, - cols: &[T], - col_span: Span, -) -> Result<(), ShellError> { - if cols.is_empty() { - return Err(ShellError::GenericError { - error: "Merge error".into(), - msg: "empty column list".into(), - span: Some(col_span), - help: None, - inner: vec![], - }); - } - - // Checking if they are same type - if cols.len() > 1 { - for w in cols.windows(2) { - let l_series = df - .column(w[0].as_ref()) - .map_err(|e| ShellError::GenericError { - error: "Error selecting columns".into(), - msg: e.to_string(), - span: Some(col_span), - help: None, - inner: vec![], - })?; - - let r_series = df - .column(w[1].as_ref()) - .map_err(|e| ShellError::GenericError { - error: "Error selecting columns".into(), - msg: e.to_string(), - span: Some(col_span), - help: None, - inner: vec![], - })?; - - if l_series.dtype() != r_series.dtype() { - return Err(ShellError::GenericError { - error: "Merge error".into(), - msg: "found different column types in list".into(), - span: Some(col_span), - help: Some(format!( - "datatypes {} and {} are incompatible", - l_series.dtype(), - r_series.dtype() - )), - inner: vec![], - }); - } - } - } - - Ok(()) -} - -#[cfg(test)] -mod test { - use super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(MeltDF {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/mod.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/mod.rs deleted file mode 100644 index db7a5c9312..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/mod.rs +++ /dev/null @@ -1,114 +0,0 @@ -mod append; -mod cast; -mod columns; -mod drop; -mod drop_duplicates; -mod drop_nulls; -mod dtypes; -mod dummies; -mod filter_with; -mod first; -mod get; -mod last; -mod list; -mod melt; -mod open; -mod query_df; -mod rename; -mod sample; -mod schema; -mod shape; -mod slice; -mod sql_context; -mod sql_expr; -mod summary; -mod take; -mod to_arrow; -mod to_avro; -mod to_csv; -mod to_df; -mod to_json_lines; -mod to_nu; -mod to_parquet; -mod with_column; - -use nu_protocol::engine::StateWorkingSet; - -pub use self::open::OpenDataFrame; -pub use append::AppendDF; -pub use cast::CastDF; -pub use columns::ColumnsDF; -pub use drop::DropDF; -pub use drop_duplicates::DropDuplicates; -pub use drop_nulls::DropNulls; -pub use dtypes::DataTypes; -pub use dummies::Dummies; -pub use filter_with::FilterWith; -pub use first::FirstDF; -pub use get::GetDF; -pub use last::LastDF; -pub use list::ListDF; -pub use melt::MeltDF; -pub use query_df::QueryDf; -pub use rename::RenameDF; -pub use sample::SampleDF; -pub use schema::SchemaDF; -pub use shape::ShapeDF; -pub use slice::SliceDF; -pub use sql_context::SQLContext; -pub use summary::Summary; -pub use take::TakeDF; -pub use to_arrow::ToArrow; -pub use to_avro::ToAvro; -pub use to_csv::ToCSV; -pub use to_df::ToDataFrame; -pub use to_json_lines::ToJsonLines; -pub use to_nu::ToNu; -pub use to_parquet::ToParquet; -pub use with_column::WithColumn; - -pub fn add_eager_decls(working_set: &mut StateWorkingSet) { - macro_rules! bind_command { - ( $command:expr ) => { - working_set.add_decl(Box::new($command)); - }; - ( $( $command:expr ),* ) => { - $( working_set.add_decl(Box::new($command)); )* - }; - } - - // Dataframe commands - bind_command!( - AppendDF, - CastDF, - ColumnsDF, - DataTypes, - Summary, - DropDF, - DropDuplicates, - DropNulls, - Dummies, - FilterWith, - FirstDF, - GetDF, - LastDF, - ListDF, - MeltDF, - OpenDataFrame, - QueryDf, - RenameDF, - SampleDF, - SchemaDF, - ShapeDF, - SliceDF, - TakeDF, - ToArrow, - ToAvro, - ToCSV, - ToDataFrame, - ToNu, - ToParquet, - ToJsonLines, - WithColumn - ); -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/open.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/open.rs deleted file mode 100644 index 38d0d0c49f..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/open.rs +++ /dev/null @@ -1,518 +0,0 @@ -use crate::dataframe::values::{NuDataFrame, NuLazyFrame, NuSchema}; -use nu_engine::command_prelude::*; - -use polars::prelude::{ - CsvEncoding, CsvReader, IpcReader, JsonFormat, JsonReader, LazyCsvReader, LazyFileListReader, - LazyFrame, ParallelStrategy, ParquetReader, ScanArgsIpc, ScanArgsParquet, SerReader, -}; -use polars_io::{avro::AvroReader, HiveOptions}; -use std::{fs::File, io::BufReader, path::PathBuf}; - -#[derive(Clone)] -pub struct OpenDataFrame; - -impl Command for OpenDataFrame { - fn name(&self) -> &str { - "dfr open" - } - - fn usage(&self) -> &str { - "Opens CSV, JSON, JSON lines, arrow, avro, or parquet file to create dataframe." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .required( - "file", - SyntaxShape::Filepath, - "file path to load values from", - ) - .switch("lazy", "creates a lazy dataframe", Some('l')) - .named( - "type", - SyntaxShape::String, - "File type: csv, tsv, json, parquet, arrow, avro. If omitted, derive from file extension", - Some('t'), - ) - .named( - "delimiter", - SyntaxShape::String, - "file delimiter character. CSV file", - Some('d'), - ) - .switch( - "no-header", - "Indicates if file doesn't have header. CSV file", - None, - ) - .named( - "infer-schema", - SyntaxShape::Number, - "Number of rows to infer the schema of the file. CSV file", - None, - ) - .named( - "skip-rows", - SyntaxShape::Number, - "Number of rows to skip from file. CSV file", - None, - ) - .named( - "columns", - SyntaxShape::List(Box::new(SyntaxShape::String)), - "Columns to be selected from csv file. CSV and Parquet file", - None, - ) - .named( - "schema", - SyntaxShape::Record(vec![]), - r#"Polars Schema in format [{name: str}]. CSV, JSON, and JSONL files"#, - Some('s') - ) - .input_output_type(Type::Any, Type::Custom("dataframe".into())) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Takes a file name and creates a dataframe", - example: "dfr open test.csv", - result: None, - }] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - _input: PipelineData, - ) -> Result { - command(engine_state, stack, call) - } -} - -fn command( - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, -) -> Result { - let file: Spanned = call.req(engine_state, stack, 0)?; - - let type_option: Option> = call.get_flag(engine_state, stack, "type")?; - - let type_id = match &type_option { - Some(ref t) => Some((t.item.to_owned(), "Invalid type", t.span)), - None => file.item.extension().map(|e| { - ( - e.to_string_lossy().into_owned(), - "Invalid extension", - file.span, - ) - }), - }; - - match type_id { - Some((e, msg, blamed)) => match e.as_str() { - "csv" | "tsv" => from_csv(engine_state, stack, call), - "parquet" | "parq" => from_parquet(engine_state, stack, call), - "ipc" | "arrow" => from_ipc(engine_state, stack, call), - "json" => from_json(engine_state, stack, call), - "jsonl" => from_jsonl(engine_state, stack, call), - "avro" => from_avro(engine_state, stack, call), - _ => Err(ShellError::FileNotFoundCustom { - msg: format!( - "{msg}. Supported values: csv, tsv, parquet, ipc, arrow, json, jsonl, avro" - ), - span: blamed, - }), - }, - None => Err(ShellError::FileNotFoundCustom { - msg: "File without extension".into(), - span: file.span, - }), - } - .map(|value| PipelineData::Value(value, None)) -} - -fn from_parquet( - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, -) -> Result { - if call.has_flag(engine_state, stack, "lazy")? { - let file: String = call.req(engine_state, stack, 0)?; - let args = ScanArgsParquet { - n_rows: None, - cache: true, - parallel: ParallelStrategy::Auto, - rechunk: false, - row_index: None, - low_memory: false, - cloud_options: None, - use_statistics: false, - hive_options: HiveOptions::default(), - }; - - let df: NuLazyFrame = LazyFrame::scan_parquet(file, args) - .map_err(|e| ShellError::GenericError { - error: "Parquet reader error".into(), - msg: format!("{e:?}"), - span: Some(call.head), - help: None, - inner: vec![], - })? - .into(); - - df.into_value(call.head) - } else { - let file: Spanned = call.req(engine_state, stack, 0)?; - let columns: Option> = call.get_flag(engine_state, stack, "columns")?; - - let r = File::open(&file.item).map_err(|e| ShellError::GenericError { - error: "Error opening file".into(), - msg: e.to_string(), - span: Some(file.span), - help: None, - inner: vec![], - })?; - let reader = ParquetReader::new(r); - - let reader = match columns { - None => reader, - Some(columns) => reader.with_columns(Some(columns)), - }; - - let df: NuDataFrame = reader - .finish() - .map_err(|e| ShellError::GenericError { - error: "Parquet reader error".into(), - msg: format!("{e:?}"), - span: Some(call.head), - help: None, - inner: vec![], - })? - .into(); - - Ok(df.into_value(call.head)) - } -} - -fn from_avro( - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, -) -> Result { - let file: Spanned = call.req(engine_state, stack, 0)?; - let columns: Option> = call.get_flag(engine_state, stack, "columns")?; - - let r = File::open(&file.item).map_err(|e| ShellError::GenericError { - error: "Error opening file".into(), - msg: e.to_string(), - span: Some(file.span), - help: None, - inner: vec![], - })?; - let reader = AvroReader::new(r); - - let reader = match columns { - None => reader, - Some(columns) => reader.with_columns(Some(columns)), - }; - - let df: NuDataFrame = reader - .finish() - .map_err(|e| ShellError::GenericError { - error: "Avro reader error".into(), - msg: format!("{e:?}"), - span: Some(call.head), - help: None, - inner: vec![], - })? - .into(); - - Ok(df.into_value(call.head)) -} - -fn from_ipc( - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, -) -> Result { - if call.has_flag(engine_state, stack, "lazy")? { - let file: String = call.req(engine_state, stack, 0)?; - let args = ScanArgsIpc { - n_rows: None, - cache: true, - rechunk: false, - row_index: None, - memory_map: true, - cloud_options: None, - }; - - let df: NuLazyFrame = LazyFrame::scan_ipc(file, args) - .map_err(|e| ShellError::GenericError { - error: "IPC reader error".into(), - msg: format!("{e:?}"), - span: Some(call.head), - help: None, - inner: vec![], - })? - .into(); - - df.into_value(call.head) - } else { - let file: Spanned = call.req(engine_state, stack, 0)?; - let columns: Option> = call.get_flag(engine_state, stack, "columns")?; - - let r = File::open(&file.item).map_err(|e| ShellError::GenericError { - error: "Error opening file".into(), - msg: e.to_string(), - span: Some(file.span), - help: None, - inner: vec![], - })?; - let reader = IpcReader::new(r); - - let reader = match columns { - None => reader, - Some(columns) => reader.with_columns(Some(columns)), - }; - - let df: NuDataFrame = reader - .finish() - .map_err(|e| ShellError::GenericError { - error: "IPC reader error".into(), - msg: format!("{e:?}"), - span: Some(call.head), - help: None, - inner: vec![], - })? - .into(); - - Ok(df.into_value(call.head)) - } -} - -fn from_json( - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, -) -> Result { - let file: Spanned = call.req(engine_state, stack, 0)?; - let file = File::open(&file.item).map_err(|e| ShellError::GenericError { - error: "Error opening file".into(), - msg: e.to_string(), - span: Some(file.span), - help: None, - inner: vec![], - })?; - let maybe_schema = call - .get_flag(engine_state, stack, "schema")? - .map(|schema| NuSchema::try_from(&schema)) - .transpose()?; - - let buf_reader = BufReader::new(file); - let reader = JsonReader::new(buf_reader); - - let reader = match maybe_schema { - Some(schema) => reader.with_schema(schema.into()), - None => reader, - }; - - let df: NuDataFrame = reader - .finish() - .map_err(|e| ShellError::GenericError { - error: "Json reader error".into(), - msg: format!("{e:?}"), - span: Some(call.head), - help: None, - inner: vec![], - })? - .into(); - - Ok(df.into_value(call.head)) -} - -fn from_jsonl( - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, -) -> Result { - let infer_schema: Option = call.get_flag(engine_state, stack, "infer-schema")?; - let maybe_schema = call - .get_flag(engine_state, stack, "schema")? - .map(|schema| NuSchema::try_from(&schema)) - .transpose()?; - let file: Spanned = call.req(engine_state, stack, 0)?; - let file = File::open(&file.item).map_err(|e| ShellError::GenericError { - error: "Error opening file".into(), - msg: e.to_string(), - span: Some(file.span), - help: None, - inner: vec![], - })?; - - let buf_reader = BufReader::new(file); - let reader = JsonReader::new(buf_reader) - .with_json_format(JsonFormat::JsonLines) - .infer_schema_len(infer_schema); - - let reader = match maybe_schema { - Some(schema) => reader.with_schema(schema.into()), - None => reader, - }; - - let df: NuDataFrame = reader - .finish() - .map_err(|e| ShellError::GenericError { - error: "Json lines reader error".into(), - msg: format!("{e:?}"), - span: Some(call.head), - help: None, - inner: vec![], - })? - .into(); - - Ok(df.into_value(call.head)) -} - -fn from_csv( - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, -) -> Result { - let delimiter: Option> = call.get_flag(engine_state, stack, "delimiter")?; - let no_header: bool = call.has_flag(engine_state, stack, "no-header")?; - let infer_schema: Option = call.get_flag(engine_state, stack, "infer-schema")?; - let skip_rows: Option = call.get_flag(engine_state, stack, "skip-rows")?; - let columns: Option> = call.get_flag(engine_state, stack, "columns")?; - - let maybe_schema = call - .get_flag(engine_state, stack, "schema")? - .map(|schema| NuSchema::try_from(&schema)) - .transpose()?; - - if call.has_flag(engine_state, stack, "lazy")? { - let file: String = call.req(engine_state, stack, 0)?; - let csv_reader = LazyCsvReader::new(file); - - let csv_reader = match delimiter { - None => csv_reader, - Some(d) => { - if d.item.len() != 1 { - return Err(ShellError::GenericError { - error: "Incorrect delimiter".into(), - msg: "Delimiter has to be one character".into(), - span: Some(d.span), - help: None, - inner: vec![], - }); - } else { - let delimiter = match d.item.chars().next() { - Some(d) => d as u8, - None => unreachable!(), - }; - csv_reader.with_separator(delimiter) - } - } - }; - - let csv_reader = csv_reader.has_header(!no_header); - - let csv_reader = match maybe_schema { - Some(schema) => csv_reader.with_schema(Some(schema.into())), - None => csv_reader, - }; - - let csv_reader = match infer_schema { - None => csv_reader, - Some(r) => csv_reader.with_infer_schema_length(Some(r)), - }; - - let csv_reader = match skip_rows { - None => csv_reader, - Some(r) => csv_reader.with_skip_rows(r), - }; - - let df: NuLazyFrame = csv_reader - .finish() - .map_err(|e| ShellError::GenericError { - error: "Parquet reader error".into(), - msg: format!("{e:?}"), - span: Some(call.head), - help: None, - inner: vec![], - })? - .into(); - - df.into_value(call.head) - } else { - let file: Spanned = call.req(engine_state, stack, 0)?; - let csv_reader = CsvReader::from_path(&file.item) - .map_err(|e| ShellError::GenericError { - error: "Error creating CSV reader".into(), - msg: e.to_string(), - span: Some(file.span), - help: None, - inner: vec![], - })? - .with_encoding(CsvEncoding::LossyUtf8); - - let csv_reader = match delimiter { - None => csv_reader, - Some(d) => { - if d.item.len() != 1 { - return Err(ShellError::GenericError { - error: "Incorrect delimiter".into(), - msg: "Delimiter has to be one character".into(), - span: Some(d.span), - help: None, - inner: vec![], - }); - } else { - let delimiter = match d.item.chars().next() { - Some(d) => d as u8, - None => unreachable!(), - }; - csv_reader.with_separator(delimiter) - } - } - }; - - let csv_reader = csv_reader.has_header(!no_header); - - let csv_reader = match maybe_schema { - Some(schema) => csv_reader.with_schema(Some(schema.into())), - None => csv_reader, - }; - - let csv_reader = match infer_schema { - None => csv_reader, - Some(r) => csv_reader.infer_schema(Some(r)), - }; - - let csv_reader = match skip_rows { - None => csv_reader, - Some(r) => csv_reader.with_skip_rows(r), - }; - - let csv_reader = match columns { - None => csv_reader, - Some(columns) => csv_reader.with_columns(Some(columns)), - }; - - let df: NuDataFrame = csv_reader - .finish() - .map_err(|e| ShellError::GenericError { - error: "Parquet reader error".into(), - msg: format!("{e:?}"), - span: Some(call.head), - help: None, - inner: vec![], - })? - .into(); - - Ok(df.into_value(call.head)) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/query_df.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/query_df.rs deleted file mode 100644 index 4088e00afa..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/query_df.rs +++ /dev/null @@ -1,104 +0,0 @@ -use crate::dataframe::{ - eager::SQLContext, - values::{Column, NuDataFrame, NuLazyFrame}, -}; -use nu_engine::command_prelude::*; - -// attribution: -// sql_context.rs, and sql_expr.rs were copied from polars-sql. thank you. -// maybe we should just use the crate at some point but it's not published yet. -// https://github.com/pola-rs/polars/tree/master/polars-sql - -#[derive(Clone)] -pub struct QueryDf; - -impl Command for QueryDf { - fn name(&self) -> &str { - "dfr query" - } - - fn usage(&self) -> &str { - "Query dataframe using SQL. Note: The dataframe is always named 'df' in your query's from clause." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .required("sql", SyntaxShape::String, "sql query") - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("dataframe".into())) - } - - fn search_terms(&self) -> Vec<&str> { - vec!["dataframe", "sql", "search"] - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Query dataframe using SQL", - example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr query 'select a from df'", - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new( - "a".to_string(), - vec![Value::test_int(1), Value::test_int(3)], - )], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let sql_query: String = call.req(engine_state, stack, 0)?; - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - - let mut ctx = SQLContext::new(); - ctx.register("df", &df.df); - let df_sql = ctx - .execute(&sql_query) - .map_err(|e| ShellError::GenericError { - error: "Dataframe Error".into(), - msg: e.to_string(), - span: Some(call.head), - help: None, - inner: vec![], - })?; - let lazy = NuLazyFrame::new(false, df_sql); - - let eager = lazy.collect(call.head)?; - let value = Value::custom(Box::new(eager), call.head); - - Ok(PipelineData::Value(value, None)) -} - -#[cfg(test)] -mod test { - use super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(QueryDf {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/rename.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/rename.rs deleted file mode 100644 index 0cb75f34f2..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/rename.rs +++ /dev/null @@ -1,185 +0,0 @@ -use crate::dataframe::{ - utils::extract_strings, - values::{Column, NuDataFrame, NuLazyFrame}, -}; -use nu_engine::command_prelude::*; - -#[derive(Clone)] -pub struct RenameDF; - -impl Command for RenameDF { - fn name(&self) -> &str { - "dfr rename" - } - - fn usage(&self) -> &str { - "Rename a dataframe column." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .required( - "columns", - SyntaxShape::Any, - "Column(s) to be renamed. A string or list of strings", - ) - .required( - "new names", - SyntaxShape::Any, - "New names for the selected column(s). A string or list of strings", - ) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("dataframe or lazyframe".into())) - } - - fn examples(&self) -> Vec { - vec![ - Example { - description: "Renames a series", - example: "[5 6 7 8] | dfr into-df | dfr rename '0' new_name", - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new( - "new_name".to_string(), - vec![ - Value::test_int(5), - Value::test_int(6), - Value::test_int(7), - Value::test_int(8), - ], - )], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - Example { - description: "Renames a dataframe column", - example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr rename a a_new", - result: Some( - NuDataFrame::try_from_columns( - vec![ - Column::new( - "a_new".to_string(), - vec![Value::test_int(1), Value::test_int(3)], - ), - Column::new( - "b".to_string(), - vec![Value::test_int(2), Value::test_int(4)], - ), - ], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - Example { - description: "Renames two dataframe columns", - example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr rename [a b] [a_new b_new]", - result: Some( - NuDataFrame::try_from_columns( - vec![ - Column::new( - "a_new".to_string(), - vec![Value::test_int(1), Value::test_int(3)], - ), - Column::new( - "b_new".to_string(), - vec![Value::test_int(2), Value::test_int(4)], - ), - ], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - ] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - let value = input.into_value(call.head)?; - if NuLazyFrame::can_downcast(&value) { - let df = NuLazyFrame::try_from_value(value)?; - command_lazy(engine_state, stack, call, df) - } else { - let df = NuDataFrame::try_from_value(value)?; - command_eager(engine_state, stack, call, df) - } - } -} - -fn command_eager( - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - mut df: NuDataFrame, -) -> Result { - let columns: Value = call.req(engine_state, stack, 0)?; - let columns = extract_strings(columns)?; - - let new_names: Value = call.req(engine_state, stack, 1)?; - let new_names = extract_strings(new_names)?; - - for (from, to) in columns.iter().zip(new_names.iter()) { - df.as_mut() - .rename(from, to) - .map_err(|e| ShellError::GenericError { - error: "Error renaming".into(), - msg: e.to_string(), - span: Some(call.head), - help: None, - inner: vec![], - })?; - } - - Ok(PipelineData::Value(df.into_value(call.head), None)) -} - -fn command_lazy( - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - lazy: NuLazyFrame, -) -> Result { - let columns: Value = call.req(engine_state, stack, 0)?; - let columns = extract_strings(columns)?; - - let new_names: Value = call.req(engine_state, stack, 1)?; - let new_names = extract_strings(new_names)?; - - if columns.len() != new_names.len() { - let value: Value = call.req(engine_state, stack, 1)?; - return Err(ShellError::IncompatibleParametersSingle { - msg: "New name list has different size to column list".into(), - span: value.span(), - }); - } - - let lazy = lazy.into_polars(); - let lazy: NuLazyFrame = lazy.rename(&columns, &new_names).into(); - - Ok(PipelineData::Value(lazy.into_value(call.head)?, None)) -} - -#[cfg(test)] -mod test { - use super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(RenameDF {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/sample.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/sample.rs deleted file mode 100644 index 2387cca489..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/sample.rs +++ /dev/null @@ -1,127 +0,0 @@ -use crate::dataframe::values::NuDataFrame; -use nu_engine::command_prelude::*; - -use polars::{prelude::NamedFrom, series::Series}; - -#[derive(Clone)] -pub struct SampleDF; - -impl Command for SampleDF { - fn name(&self) -> &str { - "dfr sample" - } - - fn usage(&self) -> &str { - "Create sample dataframe." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .named( - "n-rows", - SyntaxShape::Int, - "number of rows to be taken from dataframe", - Some('n'), - ) - .named( - "fraction", - SyntaxShape::Number, - "fraction of dataframe to be taken", - Some('f'), - ) - .named( - "seed", - SyntaxShape::Number, - "seed for the selection", - Some('s'), - ) - .switch("replace", "sample with replace", Some('e')) - .switch("shuffle", "shuffle sample", Some('u')) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![ - Example { - description: "Sample rows from dataframe", - example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr sample --n-rows 1", - result: None, // No expected value because sampling is random - }, - Example { - description: "Shows sample row using fraction and replace", - example: - "[[a b]; [1 2] [3 4] [5 6]] | dfr into-df | dfr sample --fraction 0.5 --replace", - result: None, // No expected value because sampling is random - }, - ] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let rows: Option> = call.get_flag(engine_state, stack, "n-rows")?; - let fraction: Option> = call.get_flag(engine_state, stack, "fraction")?; - let seed: Option = call - .get_flag::(engine_state, stack, "seed")? - .map(|val| val as u64); - let replace: bool = call.has_flag(engine_state, stack, "replace")?; - let shuffle: bool = call.has_flag(engine_state, stack, "shuffle")?; - - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - - match (rows, fraction) { - (Some(rows), None) => df - .as_ref() - .sample_n(&Series::new("s", &[rows.item]), replace, shuffle, seed) - .map_err(|e| ShellError::GenericError { - error: "Error creating sample".into(), - msg: e.to_string(), - span: Some(rows.span), - help: None, - inner: vec![], - }), - (None, Some(frac)) => df - .as_ref() - .sample_frac(&Series::new("frac", &[frac.item]), replace, shuffle, seed) - .map_err(|e| ShellError::GenericError { - error: "Error creating sample".into(), - msg: e.to_string(), - span: Some(frac.span), - help: None, - inner: vec![], - }), - (Some(_), Some(_)) => Err(ShellError::GenericError { - error: "Incompatible flags".into(), - msg: "Only one selection criterion allowed".into(), - span: Some(call.head), - help: None, - inner: vec![], - }), - (None, None) => Err(ShellError::GenericError { - error: "No selection".into(), - msg: "No selection criterion was found".into(), - span: Some(call.head), - help: Some("Perhaps you want to use the flag -n or -f".into()), - inner: vec![], - }), - } - .map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None)) -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/schema.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/schema.rs deleted file mode 100644 index cf887482bd..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/schema.rs +++ /dev/null @@ -1,112 +0,0 @@ -use crate::dataframe::values::NuDataFrame; -use nu_engine::command_prelude::*; - -#[derive(Clone)] -pub struct SchemaDF; - -impl Command for SchemaDF { - fn name(&self) -> &str { - "dfr schema" - } - - fn usage(&self) -> &str { - "Show schema for a dataframe." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .switch("datatype-list", "creates a lazy dataframe", Some('l')) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Dataframe schema", - example: r#"[[a b]; [1 "foo"] [3 "bar"]] | dfr into-df | dfr schema"#, - result: Some(Value::record( - record! { - "a" => Value::string("i64", Span::test_data()), - "b" => Value::string("str", Span::test_data()), - }, - Span::test_data(), - )), - }] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - if call.has_flag(engine_state, stack, "datatype-list")? { - Ok(PipelineData::Value(datatype_list(Span::unknown()), None)) - } else { - command(engine_state, stack, call, input) - } - } -} - -fn command( - _engine_state: &EngineState, - _stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - let schema = df.schema(); - let value: Value = schema.into(); - Ok(PipelineData::Value(value, None)) -} - -fn datatype_list(span: Span) -> Value { - let types: Vec = [ - ("null", ""), - ("bool", ""), - ("u8", ""), - ("u16", ""), - ("u32", ""), - ("u64", ""), - ("i8", ""), - ("i16", ""), - ("i32", ""), - ("i64", ""), - ("f32", ""), - ("f64", ""), - ("str", ""), - ("binary", ""), - ("date", ""), - ("datetime", "Time Unit can be: milliseconds: ms, microseconds: us, nanoseconds: ns. Timezone wildcard is *. Other Timezone examples: UTC, America/Los_Angeles."), - ("duration", "Time Unit can be: milliseconds: ms, microseconds: us, nanoseconds: ns."), - ("time", ""), - ("object", ""), - ("unknown", ""), - ("list", ""), - ] - .iter() - .map(|(dtype, note)| { - Value::record(record! { - "dtype" => Value::string(*dtype, span), - "note" => Value::string(*note, span), - }, - span) - }) - .collect(); - Value::list(types, span) -} - -#[cfg(test)] -mod test { - use super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(SchemaDF {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/shape.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/shape.rs deleted file mode 100644 index 6e5e7fa9d3..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/shape.rs +++ /dev/null @@ -1,82 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame}; -use nu_engine::command_prelude::*; - -#[derive(Clone)] -pub struct ShapeDF; - -impl Command for ShapeDF { - fn name(&self) -> &str { - "dfr shape" - } - - fn usage(&self) -> &str { - "Shows column and row size for a dataframe." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Shows row and column shape", - example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr shape", - result: Some( - NuDataFrame::try_from_columns( - vec![ - Column::new("rows".to_string(), vec![Value::test_int(2)]), - Column::new("columns".to_string(), vec![Value::test_int(2)]), - ], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - _engine_state: &EngineState, - _stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - - let rows = Value::int(df.as_ref().height() as i64, call.head); - - let cols = Value::int(df.as_ref().width() as i64, call.head); - - let rows_col = Column::new("rows".to_string(), vec![rows]); - let cols_col = Column::new("columns".to_string(), vec![cols]); - - NuDataFrame::try_from_columns(vec![rows_col, cols_col], None) - .map(|df| PipelineData::Value(df.into_value(call.head), None)) -} - -#[cfg(test)] -mod test { - use super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(ShapeDF {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/slice.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/slice.rs deleted file mode 100644 index 48906cba2c..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/slice.rs +++ /dev/null @@ -1,84 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame}; -use nu_engine::command_prelude::*; - -#[derive(Clone)] -pub struct SliceDF; - -impl Command for SliceDF { - fn name(&self) -> &str { - "dfr slice" - } - - fn usage(&self) -> &str { - "Creates new dataframe from a slice of rows." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .required("offset", SyntaxShape::Int, "start of slice") - .required("size", SyntaxShape::Int, "size of slice") - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Create new dataframe from a slice of the rows", - example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr slice 0 1", - result: Some( - NuDataFrame::try_from_columns( - vec![ - Column::new("a".to_string(), vec![Value::test_int(1)]), - Column::new("b".to_string(), vec![Value::test_int(2)]), - ], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let offset: i64 = call.req(engine_state, stack, 0)?; - let size: usize = call.req(engine_state, stack, 1)?; - - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - - let res = df.as_ref().slice(offset, size); - - Ok(PipelineData::Value( - NuDataFrame::dataframe_into_value(res, call.head), - None, - )) -} - -#[cfg(test)] -mod test { - use super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(SliceDF {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/sql_context.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/sql_context.rs deleted file mode 100644 index f558904344..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/sql_context.rs +++ /dev/null @@ -1,228 +0,0 @@ -use crate::dataframe::eager::sql_expr::parse_sql_expr; -use polars::error::{ErrString, PolarsError}; -use polars::prelude::{col, DataFrame, DataType, IntoLazy, LazyFrame}; -use sqlparser::ast::{ - Expr as SqlExpr, GroupByExpr, Select, SelectItem, SetExpr, Statement, TableFactor, - Value as SQLValue, -}; -use sqlparser::dialect::GenericDialect; -use sqlparser::parser::Parser; -use std::collections::HashMap; - -#[derive(Default)] -pub struct SQLContext { - table_map: HashMap, - dialect: GenericDialect, -} - -impl SQLContext { - pub fn new() -> Self { - Self { - table_map: HashMap::new(), - dialect: GenericDialect, - } - } - - pub fn register(&mut self, name: &str, df: &DataFrame) { - self.table_map.insert(name.to_owned(), df.clone().lazy()); - } - - fn execute_select(&self, select_stmt: &Select) -> Result { - // Determine involved dataframe - // Implicit join require some more work in query parsers, Explicit join are preferred for now. - let tbl = select_stmt.from.first().ok_or_else(|| { - PolarsError::ComputeError(ErrString::from("No table found in select statement")) - })?; - let mut alias_map = HashMap::new(); - let tbl_name = match &tbl.relation { - TableFactor::Table { name, alias, .. } => { - let tbl_name = name - .0 - .first() - .ok_or_else(|| { - PolarsError::ComputeError(ErrString::from( - "No table found in select statement", - )) - })? - .value - .to_string(); - if self.table_map.contains_key(&tbl_name) { - if let Some(alias) = alias { - alias_map.insert(alias.name.value.clone(), tbl_name.to_owned()); - }; - tbl_name - } else { - return Err(PolarsError::ComputeError( - format!("Table name {tbl_name} was not found").into(), - )); - } - } - // Support bare table, optional with alias for now - _ => return Err(PolarsError::ComputeError("Not implemented".into())), - }; - let df = &self.table_map[&tbl_name]; - let mut raw_projection_before_alias: HashMap = HashMap::new(); - let mut contain_wildcard = false; - // Filter Expression - let df = match select_stmt.selection.as_ref() { - Some(expr) => { - let filter_expression = parse_sql_expr(expr)?; - df.clone().filter(filter_expression) - } - None => df.clone(), - }; - // Column Projections - let projection = select_stmt - .projection - .iter() - .enumerate() - .map(|(i, select_item)| { - Ok(match select_item { - SelectItem::UnnamedExpr(expr) => { - let expr = parse_sql_expr(expr)?; - raw_projection_before_alias.insert(format!("{expr:?}"), i); - expr - } - SelectItem::ExprWithAlias { expr, alias } => { - let expr = parse_sql_expr(expr)?; - raw_projection_before_alias.insert(format!("{expr:?}"), i); - expr.alias(&alias.value) - } - SelectItem::QualifiedWildcard(_, _) | SelectItem::Wildcard(_) => { - contain_wildcard = true; - col("*") - } - }) - }) - .collect::, PolarsError>>()?; - // Check for group by - // After projection since there might be number. - let group_by = match &select_stmt.group_by { - GroupByExpr::All => - Err( - PolarsError::ComputeError("Group-By Error: Only positive number or expression are supported, not all".into()) - )?, - GroupByExpr::Expressions(expressions) => expressions - } - .iter() - .map( - |e|match e { - SqlExpr::Value(SQLValue::Number(idx, _)) => { - let idx = match idx.parse::() { - Ok(0)| Err(_) => Err( - PolarsError::ComputeError( - format!("Group-By Error: Only positive number or expression are supported, got {idx}").into() - )), - Ok(idx) => Ok(idx) - }?; - Ok(projection[idx].clone()) - } - SqlExpr::Value(_) => Err( - PolarsError::ComputeError("Group-By Error: Only positive number or expression are supported".into()) - ), - _ => parse_sql_expr(e) - } - ) - .collect::, PolarsError>>()?; - - let df = if group_by.is_empty() { - df.select(projection) - } else { - // check groupby and projection due to difference between SQL and polars - // Return error on wild card, shouldn't process this - if contain_wildcard { - return Err(PolarsError::ComputeError( - "Group-By Error: Can't process wildcard in group-by".into(), - )); - } - // Default polars group by will have group by columns at the front - // need some container to contain position of group by columns and its position - // at the final agg projection, check the schema for the existence of group by column - // and its projections columns, keeping the original index - let (exclude_expr, groupby_pos): (Vec<_>, Vec<_>) = group_by - .iter() - .map(|expr| raw_projection_before_alias.get(&format!("{expr:?}"))) - .enumerate() - .filter(|(_, proj_p)| proj_p.is_some()) - .map(|(gb_p, proj_p)| (*proj_p.unwrap_or(&0), (*proj_p.unwrap_or(&0), gb_p))) - .unzip(); - let (agg_projection, agg_proj_pos): (Vec<_>, Vec<_>) = projection - .iter() - .enumerate() - .filter(|(i, _)| !exclude_expr.contains(i)) - .enumerate() - .map(|(agg_pj, (proj_p, expr))| (expr.clone(), (proj_p, agg_pj + group_by.len()))) - .unzip(); - let agg_df = df.group_by(group_by).agg(agg_projection); - let mut final_proj_pos = groupby_pos - .into_iter() - .chain(agg_proj_pos) - .collect::>(); - - final_proj_pos.sort_by(|(proj_pa, _), (proj_pb, _)| proj_pa.cmp(proj_pb)); - let final_proj = final_proj_pos - .into_iter() - .map(|(_, shm_p)| { - col(agg_df - .clone() - // FIXME: had to do this mess to get get_index to work, not sure why. need help - .collect() - .unwrap_or_default() - .schema() - .get_at_index(shm_p) - .unwrap_or((&"".into(), &DataType::Null)) - .0) - }) - .collect::>(); - agg_df.select(final_proj) - }; - Ok(df) - } - - pub fn execute(&self, query: &str) -> Result { - let ast = Parser::parse_sql(&self.dialect, query) - .map_err(|e| PolarsError::ComputeError(format!("{e:?}").into()))?; - if ast.len() != 1 { - Err(PolarsError::ComputeError( - "One and only one statement at a time please".into(), - )) - } else { - let ast = ast - .first() - .ok_or_else(|| PolarsError::ComputeError(ErrString::from("No statement found")))?; - Ok(match ast { - Statement::Query(query) => { - let rs = match &*query.body { - SetExpr::Select(select_stmt) => self.execute_select(select_stmt)?, - _ => { - return Err(PolarsError::ComputeError( - "INSERT, UPDATE is not supported for polars".into(), - )) - } - }; - match &query.limit { - Some(SqlExpr::Value(SQLValue::Number(nrow, _))) => { - let nrow = nrow.parse().map_err(|err| { - PolarsError::ComputeError( - format!("Conversion Error: {err:?}").into(), - ) - })?; - rs.limit(nrow) - } - None => rs, - _ => { - return Err(PolarsError::ComputeError( - "Only support number argument to LIMIT clause".into(), - )) - } - } - } - _ => { - return Err(PolarsError::ComputeError( - format!("Statement type {ast:?} is not supported").into(), - )) - } - }) - } - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/sql_expr.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/sql_expr.rs deleted file mode 100644 index 9c0728ea5f..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/sql_expr.rs +++ /dev/null @@ -1,200 +0,0 @@ -use polars::error::PolarsError; -use polars::prelude::{col, lit, DataType, Expr, LiteralValue, PolarsResult as Result, TimeUnit}; - -use sqlparser::ast::{ - ArrayElemTypeDef, BinaryOperator as SQLBinaryOperator, DataType as SQLDataType, - Expr as SqlExpr, Function as SQLFunction, Value as SqlValue, WindowType, -}; - -fn map_sql_polars_datatype(data_type: &SQLDataType) -> Result { - Ok(match data_type { - SQLDataType::Char(_) - | SQLDataType::Varchar(_) - | SQLDataType::Uuid - | SQLDataType::Clob(_) - | SQLDataType::Text - | SQLDataType::String(_) => DataType::String, - SQLDataType::Float(_) => DataType::Float32, - SQLDataType::Real => DataType::Float32, - SQLDataType::Double => DataType::Float64, - SQLDataType::TinyInt(_) => DataType::Int8, - SQLDataType::UnsignedTinyInt(_) => DataType::UInt8, - SQLDataType::SmallInt(_) => DataType::Int16, - SQLDataType::UnsignedSmallInt(_) => DataType::UInt16, - SQLDataType::Int(_) => DataType::Int32, - SQLDataType::UnsignedInt(_) => DataType::UInt32, - SQLDataType::BigInt(_) => DataType::Int64, - SQLDataType::UnsignedBigInt(_) => DataType::UInt64, - - SQLDataType::Boolean => DataType::Boolean, - SQLDataType::Date => DataType::Date, - SQLDataType::Time(_, _) => DataType::Time, - SQLDataType::Timestamp(_, _) => DataType::Datetime(TimeUnit::Microseconds, None), - SQLDataType::Interval => DataType::Duration(TimeUnit::Microseconds), - SQLDataType::Array(array_type_def) => match array_type_def { - ArrayElemTypeDef::AngleBracket(inner_type) - | ArrayElemTypeDef::SquareBracket(inner_type) => { - DataType::List(Box::new(map_sql_polars_datatype(inner_type)?)) - } - _ => { - return Err(PolarsError::ComputeError( - "SQL Datatype Array(None) was not supported in polars-sql yet!".into(), - )) - } - }, - _ => { - return Err(PolarsError::ComputeError( - format!("SQL Datatype {data_type:?} was not supported in polars-sql yet!").into(), - )) - } - }) -} - -fn cast_(expr: Expr, data_type: &SQLDataType) -> Result { - let polars_type = map_sql_polars_datatype(data_type)?; - Ok(expr.cast(polars_type)) -} - -fn binary_op_(left: Expr, right: Expr, op: &SQLBinaryOperator) -> Result { - Ok(match op { - SQLBinaryOperator::Plus => left + right, - SQLBinaryOperator::Minus => left - right, - SQLBinaryOperator::Multiply => left * right, - SQLBinaryOperator::Divide => left / right, - SQLBinaryOperator::Modulo => left % right, - SQLBinaryOperator::StringConcat => { - left.cast(DataType::String) + right.cast(DataType::String) - } - SQLBinaryOperator::Gt => left.gt(right), - SQLBinaryOperator::Lt => left.lt(right), - SQLBinaryOperator::GtEq => left.gt_eq(right), - SQLBinaryOperator::LtEq => left.lt_eq(right), - SQLBinaryOperator::Eq => left.eq(right), - SQLBinaryOperator::NotEq => left.eq(right).not(), - SQLBinaryOperator::And => left.and(right), - SQLBinaryOperator::Or => left.or(right), - SQLBinaryOperator::Xor => left.xor(right), - _ => { - return Err(PolarsError::ComputeError( - format!("SQL Operator {op:?} was not supported in polars-sql yet!").into(), - )) - } - }) -} - -fn literal_expr(value: &SqlValue) -> Result { - Ok(match value { - SqlValue::Number(s, _) => { - // Check for existence of decimal separator dot - if s.contains('.') { - s.parse::().map(lit).map_err(|_| { - PolarsError::ComputeError(format!("Can't parse literal {s:?}").into()) - }) - } else { - s.parse::().map(lit).map_err(|_| { - PolarsError::ComputeError(format!("Can't parse literal {s:?}").into()) - }) - }? - } - SqlValue::SingleQuotedString(s) => lit(s.clone()), - SqlValue::NationalStringLiteral(s) => lit(s.clone()), - SqlValue::HexStringLiteral(s) => lit(s.clone()), - SqlValue::DoubleQuotedString(s) => lit(s.clone()), - SqlValue::Boolean(b) => lit(*b), - SqlValue::Null => Expr::Literal(LiteralValue::Null), - _ => { - return Err(PolarsError::ComputeError( - format!("Parsing SQL Value {value:?} was not supported in polars-sql yet!").into(), - )) - } - }) -} - -pub fn parse_sql_expr(expr: &SqlExpr) -> Result { - Ok(match expr { - SqlExpr::Identifier(e) => col(&e.value), - SqlExpr::BinaryOp { left, op, right } => { - let left = parse_sql_expr(left)?; - let right = parse_sql_expr(right)?; - binary_op_(left, right, op)? - } - SqlExpr::Function(sql_function) => parse_sql_function(sql_function)?, - SqlExpr::Cast { - expr, - data_type, - format: _, - } => cast_(parse_sql_expr(expr)?, data_type)?, - SqlExpr::Nested(expr) => parse_sql_expr(expr)?, - SqlExpr::Value(value) => literal_expr(value)?, - _ => { - return Err(PolarsError::ComputeError( - format!("Expression: {expr:?} was not supported in polars-sql yet!").into(), - )) - } - }) -} - -fn apply_window_spec(expr: Expr, window_type: Option<&WindowType>) -> Result { - Ok(match &window_type { - Some(wtype) => match wtype { - WindowType::WindowSpec(window_spec) => { - // Process for simple window specification, partition by first - let partition_by = window_spec - .partition_by - .iter() - .map(parse_sql_expr) - .collect::>>()?; - expr.over(partition_by) - // Order by and Row range may not be supported at the moment - } - // TODO: make NamedWindow work - WindowType::NamedWindow(_named) => { - return Err(PolarsError::ComputeError( - format!("Expression: {expr:?} was not supported in polars-sql yet!").into(), - )) - } - }, - None => expr, - }) -} - -fn parse_sql_function(sql_function: &SQLFunction) -> Result { - use sqlparser::ast::{FunctionArg, FunctionArgExpr}; - // Function name mostly do not have name space, so it mostly take the first args - let function_name = sql_function.name.0[0].value.to_ascii_lowercase(); - let args = sql_function - .args - .iter() - .map(|arg| match arg { - FunctionArg::Named { arg, .. } => arg, - FunctionArg::Unnamed(arg) => arg, - }) - .collect::>(); - Ok( - match ( - function_name.as_str(), - args.as_slice(), - sql_function.distinct, - ) { - ("sum", [FunctionArgExpr::Expr(expr)], false) => { - apply_window_spec(parse_sql_expr(expr)?, sql_function.over.as_ref())?.sum() - } - ("count", [FunctionArgExpr::Expr(expr)], false) => { - apply_window_spec(parse_sql_expr(expr)?, sql_function.over.as_ref())?.count() - } - ("count", [FunctionArgExpr::Expr(expr)], true) => { - apply_window_spec(parse_sql_expr(expr)?, sql_function.over.as_ref())?.n_unique() - } - // Special case for wildcard args to count function. - ("count", [FunctionArgExpr::Wildcard], false) => lit(1i32).count(), - _ => { - return Err(PolarsError::ComputeError( - format!( - "Function {function_name:?} with args {args:?} was not supported in polars-sql yet!" - ) - .into(), - )) - } - }, - ) -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/summary.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/summary.rs deleted file mode 100644 index 845929a52d..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/summary.rs +++ /dev/null @@ -1,279 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame}; -use nu_engine::command_prelude::*; - -use polars::{ - chunked_array::ChunkedArray, - prelude::{ - AnyValue, DataFrame, DataType, Float64Type, IntoSeries, NewChunkedArray, - QuantileInterpolOptions, Series, StringType, - }, -}; - -#[derive(Clone)] -pub struct Summary; - -impl Command for Summary { - fn name(&self) -> &str { - "dfr summary" - } - - fn usage(&self) -> &str { - "For a dataframe, produces descriptive statistics (summary statistics) for its numeric columns." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .category(Category::Custom("dataframe".into())) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .named( - "quantiles", - SyntaxShape::Table(vec![]), - "provide optional quantiles", - Some('q'), - ) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "list dataframe descriptives", - example: "[[a b]; [1 1] [1 1]] | dfr into-df | dfr summary", - result: Some( - NuDataFrame::try_from_columns( - vec![ - Column::new( - "descriptor".to_string(), - vec![ - Value::test_string("count"), - Value::test_string("sum"), - Value::test_string("mean"), - Value::test_string("median"), - Value::test_string("std"), - Value::test_string("min"), - Value::test_string("25%"), - Value::test_string("50%"), - Value::test_string("75%"), - Value::test_string("max"), - ], - ), - Column::new( - "a (i64)".to_string(), - vec![ - Value::test_float(2.0), - Value::test_float(2.0), - Value::test_float(1.0), - Value::test_float(1.0), - Value::test_float(0.0), - Value::test_float(1.0), - Value::test_float(1.0), - Value::test_float(1.0), - Value::test_float(1.0), - Value::test_float(1.0), - ], - ), - Column::new( - "b (i64)".to_string(), - vec![ - Value::test_float(2.0), - Value::test_float(2.0), - Value::test_float(1.0), - Value::test_float(1.0), - Value::test_float(0.0), - Value::test_float(1.0), - Value::test_float(1.0), - Value::test_float(1.0), - Value::test_float(1.0), - Value::test_float(1.0), - ], - ), - ], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let quantiles: Option> = call.get_flag(engine_state, stack, "quantiles")?; - let quantiles = quantiles.map(|values| { - values - .iter() - .map(|value| { - let span = value.span(); - match value { - Value::Float { val, .. } => { - if (&0.0..=&1.0).contains(&val) { - Ok(*val) - } else { - Err(ShellError::GenericError { - error: "Incorrect value for quantile".into(), - msg: "value should be between 0 and 1".into(), - span: Some(span), - help: None, - inner: vec![], - }) - } - } - Value::Error { error, .. } => Err(*error.clone()), - _ => Err(ShellError::GenericError { - error: "Incorrect value for quantile".into(), - msg: "value should be a float".into(), - span: Some(span), - help: None, - inner: vec![], - }), - } - }) - .collect::, ShellError>>() - }); - - let quantiles = match quantiles { - Some(quantiles) => quantiles?, - None => vec![0.25, 0.50, 0.75], - }; - - let mut quantiles_labels = quantiles - .iter() - .map(|q| Some(format!("{}%", q * 100.0))) - .collect::>>(); - let mut labels = vec![ - Some("count".to_string()), - Some("sum".to_string()), - Some("mean".to_string()), - Some("median".to_string()), - Some("std".to_string()), - Some("min".to_string()), - ]; - labels.append(&mut quantiles_labels); - labels.push(Some("max".to_string())); - - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - - let names = ChunkedArray::::from_slice_options("descriptor", &labels).into_series(); - - let head = std::iter::once(names); - - let tail = df - .as_ref() - .get_columns() - .iter() - .filter(|col| !matches!(col.dtype(), &DataType::Object("object", _))) - .map(|col| { - let count = col.len() as f64; - - let sum = col.sum_as_series().ok().and_then(|series| { - series - .cast(&DataType::Float64) - .ok() - .and_then(|ca| match ca.get(0) { - Ok(AnyValue::Float64(v)) => Some(v), - _ => None, - }) - }); - - let mean = match col.mean_as_series().get(0) { - Ok(AnyValue::Float64(v)) => Some(v), - _ => None, - }; - - let median = match col.median_as_series() { - Ok(v) => match v.get(0) { - Ok(AnyValue::Float64(v)) => Some(v), - _ => None, - }, - _ => None, - }; - - let std = match col.std_as_series(0) { - Ok(v) => match v.get(0) { - Ok(AnyValue::Float64(v)) => Some(v), - _ => None, - }, - _ => None, - }; - - let min = col.min_as_series().ok().and_then(|series| { - series - .cast(&DataType::Float64) - .ok() - .and_then(|ca| match ca.get(0) { - Ok(AnyValue::Float64(v)) => Some(v), - _ => None, - }) - }); - - let mut quantiles = quantiles - .clone() - .into_iter() - .map(|q| { - col.quantile_as_series(q, QuantileInterpolOptions::default()) - .ok() - .and_then(|ca| ca.cast(&DataType::Float64).ok()) - .and_then(|ca| match ca.get(0) { - Ok(AnyValue::Float64(v)) => Some(v), - _ => None, - }) - }) - .collect::>>(); - - let max = col.max_as_series().ok().and_then(|series| { - series - .cast(&DataType::Float64) - .ok() - .and_then(|ca| match ca.get(0) { - Ok(AnyValue::Float64(v)) => Some(v), - _ => None, - }) - }); - - let mut descriptors = vec![Some(count), sum, mean, median, std, min]; - descriptors.append(&mut quantiles); - descriptors.push(max); - - let name = format!("{} ({})", col.name(), col.dtype()); - ChunkedArray::::from_slice_options(&name, &descriptors).into_series() - }); - - let res = head.chain(tail).collect::>(); - - DataFrame::new(res) - .map_err(|e| ShellError::GenericError { - error: "Dataframe Error".into(), - msg: e.to_string(), - span: Some(call.head), - help: None, - inner: vec![], - }) - .map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None)) -} - -#[cfg(test)] -mod test { - use super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(Summary {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/take.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/take.rs deleted file mode 100644 index 406dd1d624..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/take.rs +++ /dev/null @@ -1,148 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame}; -use nu_engine::command_prelude::*; - -use polars::prelude::DataType; - -#[derive(Clone)] -pub struct TakeDF; - -impl Command for TakeDF { - fn name(&self) -> &str { - "dfr take" - } - - fn usage(&self) -> &str { - "Creates new dataframe using the given indices." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .required( - "indices", - SyntaxShape::Any, - "list of indices used to take data", - ) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![ - Example { - description: "Takes selected rows from dataframe", - example: r#"let df = ([[a b]; [4 1] [5 2] [4 3]] | dfr into-df); - let indices = ([0 2] | dfr into-df); - $df | dfr take $indices"#, - result: Some( - NuDataFrame::try_from_columns( - vec![ - Column::new( - "a".to_string(), - vec![Value::test_int(4), Value::test_int(4)], - ), - Column::new( - "b".to_string(), - vec![Value::test_int(1), Value::test_int(3)], - ), - ], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - Example { - description: "Takes selected rows from series", - example: r#"let series = ([4 1 5 2 4 3] | dfr into-df); - let indices = ([0 2] | dfr into-df); - $series | dfr take $indices"#, - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new( - "0".to_string(), - vec![Value::test_int(4), Value::test_int(5)], - )], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - ] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let index_value: Value = call.req(engine_state, stack, 0)?; - let index_span = index_value.span(); - let index = NuDataFrame::try_from_value(index_value)?.as_series(index_span)?; - - let casted = match index.dtype() { - DataType::UInt32 | DataType::UInt64 | DataType::Int32 | DataType::Int64 => index - .cast(&DataType::UInt32) - .map_err(|e| ShellError::GenericError { - error: "Error casting index list".into(), - msg: e.to_string(), - span: Some(index_span), - help: None, - inner: vec![], - }), - _ => Err(ShellError::GenericError { - error: "Incorrect type".into(), - msg: "Series with incorrect type".into(), - span: Some(call.head), - help: Some("Consider using a Series with type int type".into()), - inner: vec![], - }), - }?; - - let indices = casted.u32().map_err(|e| ShellError::GenericError { - error: "Error casting index list".into(), - msg: e.to_string(), - span: Some(index_span), - help: None, - inner: vec![], - })?; - - NuDataFrame::try_from_pipeline(input, call.head).and_then(|df| { - df.as_ref() - .take(indices) - .map_err(|e| ShellError::GenericError { - error: "Error taking values".into(), - msg: e.to_string(), - span: Some(call.head), - help: None, - inner: vec![], - }) - .map(|df| PipelineData::Value(NuDataFrame::dataframe_into_value(df, call.head), None)) - }) -} - -#[cfg(test)] -mod test { - use super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(TakeDF {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/to_arrow.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/to_arrow.rs deleted file mode 100644 index 66f13121bf..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/to_arrow.rs +++ /dev/null @@ -1,79 +0,0 @@ -use crate::dataframe::values::NuDataFrame; -use nu_engine::command_prelude::*; - -use polars::prelude::{IpcWriter, SerWriter}; -use std::{fs::File, path::PathBuf}; - -#[derive(Clone)] -pub struct ToArrow; - -impl Command for ToArrow { - fn name(&self) -> &str { - "dfr to-arrow" - } - - fn usage(&self) -> &str { - "Saves dataframe to arrow file." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .required("file", SyntaxShape::Filepath, "file path to save dataframe") - .input_output_type(Type::Custom("dataframe".into()), Type::Any) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Saves dataframe to arrow file", - example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr to-arrow test.arrow", - result: None, - }] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let file_name: Spanned = call.req(engine_state, stack, 0)?; - - let mut df = NuDataFrame::try_from_pipeline(input, call.head)?; - - let mut file = File::create(&file_name.item).map_err(|e| ShellError::GenericError { - error: "Error with file name".into(), - msg: e.to_string(), - span: Some(file_name.span), - help: None, - inner: vec![], - })?; - - IpcWriter::new(&mut file) - .finish(df.as_mut()) - .map_err(|e| ShellError::GenericError { - error: "Error saving file".into(), - msg: e.to_string(), - span: Some(file_name.span), - help: None, - inner: vec![], - })?; - - let file_value = Value::string(format!("saved {:?}", &file_name.item), file_name.span); - - Ok(PipelineData::Value( - Value::list(vec![file_value], call.head), - None, - )) -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/to_avro.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/to_avro.rs deleted file mode 100644 index e5e5c6fae1..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/to_avro.rs +++ /dev/null @@ -1,109 +0,0 @@ -use crate::dataframe::values::NuDataFrame; -use nu_engine::command_prelude::*; - -use polars_io::{ - avro::{AvroCompression, AvroWriter}, - SerWriter, -}; -use std::{fs::File, path::PathBuf}; - -#[derive(Clone)] -pub struct ToAvro; - -impl Command for ToAvro { - fn name(&self) -> &str { - "dfr to-avro" - } - - fn usage(&self) -> &str { - "Saves dataframe to avro file." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .named( - "compression", - SyntaxShape::String, - "use compression, supports deflate or snappy", - Some('c'), - ) - .required("file", SyntaxShape::Filepath, "file path to save dataframe") - .input_output_type(Type::Custom("dataframe".into()), Type::Any) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Saves dataframe to avro file", - example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr to-avro test.avro", - result: None, - }] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn get_compression(call: &Call) -> Result, ShellError> { - if let Some((compression, span)) = call - .get_flag_expr("compression") - .and_then(|e| e.as_string().map(|s| (s, e.span))) - { - match compression.as_ref() { - "snappy" => Ok(Some(AvroCompression::Snappy)), - "deflate" => Ok(Some(AvroCompression::Deflate)), - _ => Err(ShellError::IncorrectValue { - msg: "compression must be one of deflate or snappy".to_string(), - val_span: span, - call_span: span, - }), - } - } else { - Ok(None) - } -} - -fn command( - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let file_name: Spanned = call.req(engine_state, stack, 0)?; - let compression = get_compression(call)?; - - let mut df = NuDataFrame::try_from_pipeline(input, call.head)?; - - let file = File::create(&file_name.item).map_err(|e| ShellError::GenericError { - error: "Error with file name".into(), - msg: e.to_string(), - span: Some(file_name.span), - help: None, - inner: vec![], - })?; - - AvroWriter::new(file) - .with_compression(compression) - .finish(df.as_mut()) - .map_err(|e| ShellError::GenericError { - error: "Error saving file".into(), - msg: e.to_string(), - span: Some(file_name.span), - help: None, - inner: vec![], - })?; - - let file_value = Value::string(format!("saved {:?}", &file_name.item), file_name.span); - - Ok(PipelineData::Value( - Value::list(vec![file_value], call.head), - None, - )) -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/to_csv.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/to_csv.rs deleted file mode 100644 index d85bed5150..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/to_csv.rs +++ /dev/null @@ -1,125 +0,0 @@ -use crate::dataframe::values::NuDataFrame; -use nu_engine::command_prelude::*; - -use polars::prelude::{CsvWriter, SerWriter}; -use std::{fs::File, path::PathBuf}; - -#[derive(Clone)] -pub struct ToCSV; - -impl Command for ToCSV { - fn name(&self) -> &str { - "dfr to-csv" - } - - fn usage(&self) -> &str { - "Saves dataframe to CSV file." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .required("file", SyntaxShape::Filepath, "file path to save dataframe") - .named( - "delimiter", - SyntaxShape::String, - "file delimiter character", - Some('d'), - ) - .switch("no-header", "Indicates if file doesn't have header", None) - .input_output_type(Type::Custom("dataframe".into()), Type::Any) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![ - Example { - description: "Saves dataframe to CSV file", - example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr to-csv test.csv", - result: None, - }, - Example { - description: "Saves dataframe to CSV file using other delimiter", - example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr to-csv test.csv --delimiter '|'", - result: None, - }, - ] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let file_name: Spanned = call.req(engine_state, stack, 0)?; - let delimiter: Option> = call.get_flag(engine_state, stack, "delimiter")?; - let no_header: bool = call.has_flag(engine_state, stack, "no-header")?; - - let mut df = NuDataFrame::try_from_pipeline(input, call.head)?; - - let mut file = File::create(&file_name.item).map_err(|e| ShellError::GenericError { - error: "Error with file name".into(), - msg: e.to_string(), - span: Some(file_name.span), - help: None, - inner: vec![], - })?; - - let writer = CsvWriter::new(&mut file); - - let writer = if no_header { - writer.include_header(false) - } else { - writer.include_header(true) - }; - - let mut writer = match delimiter { - None => writer, - Some(d) => { - if d.item.len() != 1 { - return Err(ShellError::GenericError { - error: "Incorrect delimiter".into(), - msg: "Delimiter has to be one char".into(), - span: Some(d.span), - help: None, - inner: vec![], - }); - } else { - let delimiter = match d.item.chars().next() { - Some(d) => d as u8, - None => unreachable!(), - }; - - writer.with_separator(delimiter) - } - } - }; - - writer - .finish(df.as_mut()) - .map_err(|e| ShellError::GenericError { - error: "Error writing to file".into(), - msg: e.to_string(), - span: Some(file_name.span), - help: None, - inner: vec![], - })?; - - let file_value = Value::string(format!("saved {:?}", &file_name.item), file_name.span); - - Ok(PipelineData::Value( - Value::list(vec![file_value], call.head), - None, - )) -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/to_df.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/to_df.rs deleted file mode 100644 index d768c7a742..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/to_df.rs +++ /dev/null @@ -1,189 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame, NuSchema}; -use nu_engine::command_prelude::*; - -use polars::prelude::*; - -#[derive(Clone)] -pub struct ToDataFrame; - -impl Command for ToDataFrame { - fn name(&self) -> &str { - "dfr into-df" - } - - fn usage(&self) -> &str { - "Converts a list, table or record into a dataframe." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .named( - "schema", - SyntaxShape::Record(vec![]), - r#"Polars Schema in format [{name: str}]. CSV, JSON, and JSONL files"#, - Some('s'), - ) - .input_output_type(Type::Any, Type::Custom("dataframe".into())) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![ - Example { - description: "Takes a dictionary and creates a dataframe", - example: "[[a b];[1 2] [3 4]] | dfr into-df", - result: Some( - NuDataFrame::try_from_columns( - vec![ - Column::new( - "a".to_string(), - vec![Value::test_int(1), Value::test_int(3)], - ), - Column::new( - "b".to_string(), - vec![Value::test_int(2), Value::test_int(4)], - ), - ], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - Example { - description: "Takes a list of tables and creates a dataframe", - example: "[[1 2 a] [3 4 b] [5 6 c]] | dfr into-df", - result: Some( - NuDataFrame::try_from_columns( - vec![ - Column::new( - "0".to_string(), - vec![Value::test_int(1), Value::test_int(3), Value::test_int(5)], - ), - Column::new( - "1".to_string(), - vec![Value::test_int(2), Value::test_int(4), Value::test_int(6)], - ), - Column::new( - "2".to_string(), - vec![ - Value::test_string("a"), - Value::test_string("b"), - Value::test_string("c"), - ], - ), - ], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - Example { - description: "Takes a list and creates a dataframe", - example: "[a b c] | dfr into-df", - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new( - "0".to_string(), - vec![ - Value::test_string("a"), - Value::test_string("b"), - Value::test_string("c"), - ], - )], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - Example { - description: "Takes a list of booleans and creates a dataframe", - example: "[true true false] | dfr into-df", - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new( - "0".to_string(), - vec![ - Value::test_bool(true), - Value::test_bool(true), - Value::test_bool(false), - ], - )], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - Example { - description: "Convert to a dataframe and provide a schema", - example: "{a: 1, b: {a: [1 2 3]}, c: [a b c]}| dfr into-df -s {a: u8, b: {a: list}, c: list}", - result: Some( - NuDataFrame::try_from_series(vec![ - Series::new("a", &[1u8]), - { - let dtype = DataType::Struct(vec![Field::new("a", DataType::List(Box::new(DataType::UInt64)))]); - let vals = vec![AnyValue::StructOwned( - Box::new((vec![AnyValue::List(Series::new("a", &[1u64, 2, 3]))], vec![Field::new("a", DataType::String)]))); 1]; - Series::from_any_values_and_dtype("b", &vals, &dtype, false) - .expect("Struct series should not fail") - }, - { - let dtype = DataType::List(Box::new(DataType::String)); - let vals = vec![AnyValue::List(Series::new("c", &["a", "b", "c"]))]; - Series::from_any_values_and_dtype("c", &vals, &dtype, false) - .expect("List series should not fail") - } - ], Span::test_data()) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - Example { - description: "Convert to a dataframe and provide a schema that adds a new column", - example: r#"[[a b]; [1 "foo"] [2 "bar"]] | dfr into-df -s {a: u8, b:str, c:i64} | dfr fill-null 3"#, - result: Some(NuDataFrame::try_from_series(vec![ - Series::new("a", [1u8, 2]), - Series::new("b", ["foo", "bar"]), - Series::new("c", [3i64, 3]), - ], Span::test_data()) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - } - ] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - let maybe_schema = call - .get_flag(engine_state, stack, "schema")? - .map(|schema| NuSchema::try_from(&schema)) - .transpose()?; - - let df = NuDataFrame::try_from_iter(input.into_iter(), maybe_schema.clone())?; - - Ok(PipelineData::Value( - NuDataFrame::into_value(df, call.head), - None, - )) - } -} - -#[cfg(test)] -mod test { - use super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(ToDataFrame {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/to_json_lines.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/to_json_lines.rs deleted file mode 100644 index 5875f17107..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/to_json_lines.rs +++ /dev/null @@ -1,80 +0,0 @@ -use crate::dataframe::values::NuDataFrame; -use nu_engine::command_prelude::*; - -use polars::prelude::{JsonWriter, SerWriter}; -use std::{fs::File, io::BufWriter, path::PathBuf}; - -#[derive(Clone)] -pub struct ToJsonLines; - -impl Command for ToJsonLines { - fn name(&self) -> &str { - "dfr to-jsonl" - } - - fn usage(&self) -> &str { - "Saves dataframe to a JSON lines file." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .required("file", SyntaxShape::Filepath, "file path to save dataframe") - .input_output_type(Type::Custom("dataframe".into()), Type::Any) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Saves dataframe to JSON lines file", - example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr to-jsonl test.jsonl", - result: None, - }] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let file_name: Spanned = call.req(engine_state, stack, 0)?; - - let mut df = NuDataFrame::try_from_pipeline(input, call.head)?; - - let file = File::create(&file_name.item).map_err(|e| ShellError::GenericError { - error: "Error with file name".into(), - msg: e.to_string(), - span: Some(file_name.span), - help: None, - inner: vec![], - })?; - let buf_writer = BufWriter::new(file); - - JsonWriter::new(buf_writer) - .finish(df.as_mut()) - .map_err(|e| ShellError::GenericError { - error: "Error saving file".into(), - msg: e.to_string(), - span: Some(file_name.span), - help: None, - inner: vec![], - })?; - - let file_value = Value::string(format!("saved {:?}", &file_name.item), file_name.span); - - Ok(PipelineData::Value( - Value::list(vec![file_value], call.head), - None, - )) -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/to_nu.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/to_nu.rs deleted file mode 100644 index a6ab42052c..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/to_nu.rs +++ /dev/null @@ -1,136 +0,0 @@ -use crate::dataframe::values::{NuDataFrame, NuExpression}; -use nu_engine::command_prelude::*; - -#[derive(Clone)] -pub struct ToNu; - -impl Command for ToNu { - fn name(&self) -> &str { - "dfr into-nu" - } - - fn usage(&self) -> &str { - "Converts a dataframe or an expression into into nushell value for access and exploration." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .named( - "rows", - SyntaxShape::Number, - "number of rows to be shown", - Some('n'), - ) - .switch("tail", "shows tail rows", Some('t')) - .input_output_types(vec![ - (Type::Custom("expression".into()), Type::Any), - (Type::Custom("dataframe".into()), Type::table()), - ]) - //.input_output_type(Type::Any, Type::Any) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - let rec_1 = Value::test_record(record! { - "index" => Value::test_int(0), - "a" => Value::test_int(1), - "b" => Value::test_int(2), - }); - let rec_2 = Value::test_record(record! { - "index" => Value::test_int(1), - "a" => Value::test_int(3), - "b" => Value::test_int(4), - }); - let rec_3 = Value::test_record(record! { - "index" => Value::test_int(2), - "a" => Value::test_int(3), - "b" => Value::test_int(4), - }); - - vec![ - Example { - description: "Shows head rows from dataframe", - example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr into-nu", - result: Some(Value::list(vec![rec_1, rec_2], Span::test_data())), - }, - Example { - description: "Shows tail rows from dataframe", - example: "[[a b]; [1 2] [5 6] [3 4]] | dfr into-df | dfr into-nu --tail --rows 1", - result: Some(Value::list(vec![rec_3], Span::test_data())), - }, - Example { - description: "Convert a col expression into a nushell value", - example: "dfr col a | dfr into-nu", - result: Some(Value::test_record(record! { - "expr" => Value::test_string("column"), - "value" => Value::test_string("a"), - })), - }, - ] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - let value = input.into_value(call.head)?; - if NuDataFrame::can_downcast(&value) { - dataframe_command(engine_state, stack, call, value) - } else { - expression_command(call, value) - } - } -} - -fn dataframe_command( - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: Value, -) -> Result { - let rows: Option = call.get_flag(engine_state, stack, "rows")?; - let tail: bool = call.has_flag(engine_state, stack, "tail")?; - - let df = NuDataFrame::try_from_value(input)?; - - let values = if tail { - df.tail(rows, call.head)? - } else { - // if rows is specified, return those rows, otherwise return everything - if rows.is_some() { - df.head(rows, call.head)? - } else { - df.head(Some(df.height()), call.head)? - } - }; - - let value = Value::list(values, call.head); - - Ok(PipelineData::Value(value, None)) -} -fn expression_command(call: &Call, input: Value) -> Result { - let expr = NuExpression::try_from_value(input)?; - let value = expr.to_value(call.head)?; - - Ok(PipelineData::Value(value, None)) -} - -#[cfg(test)] -mod test { - use super::super::super::expressions::ExprCol; - use super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples_dataframe_input() { - test_dataframe(vec![Box::new(ToNu {})]) - } - - #[test] - fn test_examples_expression_input() { - test_dataframe(vec![Box::new(ToNu {}), Box::new(ExprCol {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/to_parquet.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/to_parquet.rs deleted file mode 100644 index ce6419a9ac..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/to_parquet.rs +++ /dev/null @@ -1,79 +0,0 @@ -use crate::dataframe::values::NuDataFrame; -use nu_engine::command_prelude::*; - -use polars::prelude::ParquetWriter; -use std::{fs::File, path::PathBuf}; - -#[derive(Clone)] -pub struct ToParquet; - -impl Command for ToParquet { - fn name(&self) -> &str { - "dfr to-parquet" - } - - fn usage(&self) -> &str { - "Saves dataframe to parquet file." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .required("file", SyntaxShape::Filepath, "file path to save dataframe") - .input_output_type(Type::Custom("dataframe".into()), Type::Any) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Saves dataframe to parquet file", - example: "[[a b]; [1 2] [3 4]] | dfr into-df | dfr to-parquet test.parquet", - result: None, - }] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let file_name: Spanned = call.req(engine_state, stack, 0)?; - - let mut df = NuDataFrame::try_from_pipeline(input, call.head)?; - - let file = File::create(&file_name.item).map_err(|e| ShellError::GenericError { - error: "Error with file name".into(), - msg: e.to_string(), - span: Some(file_name.span), - help: None, - inner: vec![], - })?; - - ParquetWriter::new(file) - .finish(df.as_mut()) - .map_err(|e| ShellError::GenericError { - error: "Error saving file".into(), - msg: e.to_string(), - span: Some(file_name.span), - help: None, - inner: vec![], - })?; - - let file_value = Value::string(format!("saved {:?}", &file_name.item), file_name.span); - - Ok(PipelineData::Value( - Value::list(vec![file_value], call.head), - None, - )) -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/eager/with_column.rs b/crates/nu-cmd-dataframe/src/dataframe/eager/with_column.rs deleted file mode 100644 index 79d3427e8a..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/eager/with_column.rs +++ /dev/null @@ -1,202 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame}; -use nu_engine::command_prelude::*; - -#[derive(Clone)] -pub struct WithColumn; - -impl Command for WithColumn { - fn name(&self) -> &str { - "dfr with-column" - } - - fn usage(&self) -> &str { - "Adds a series to the dataframe." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .named("name", SyntaxShape::String, "new column name", Some('n')) - .rest( - "series or expressions", - SyntaxShape::Any, - "series to be added or expressions used to define the new columns", - ) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("dataframe or lazyframe".into())) - } - - fn examples(&self) -> Vec { - vec![ - Example { - description: "Adds a series to the dataframe", - example: r#"[[a b]; [1 2] [3 4]] - | dfr into-df - | dfr with-column ([5 6] | dfr into-df) --name c"#, - result: Some( - NuDataFrame::try_from_columns( - vec![ - Column::new( - "a".to_string(), - vec![Value::test_int(1), Value::test_int(3)], - ), - Column::new( - "b".to_string(), - vec![Value::test_int(2), Value::test_int(4)], - ), - Column::new( - "c".to_string(), - vec![Value::test_int(5), Value::test_int(6)], - ), - ], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - Example { - description: "Adds a series to the dataframe", - example: r#"[[a b]; [1 2] [3 4]] - | dfr into-lazy - | dfr with-column [ - ((dfr col a) * 2 | dfr as "c") - ((dfr col a) * 3 | dfr as "d") - ] - | dfr collect"#, - result: Some( - NuDataFrame::try_from_columns( - vec![ - Column::new( - "a".to_string(), - vec![Value::test_int(1), Value::test_int(3)], - ), - Column::new( - "b".to_string(), - vec![Value::test_int(2), Value::test_int(4)], - ), - Column::new( - "c".to_string(), - vec![Value::test_int(2), Value::test_int(6)], - ), - Column::new( - "d".to_string(), - vec![Value::test_int(3), Value::test_int(9)], - ), - ], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - ] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - let value = input.into_value(call.head)?; - if NuLazyFrame::can_downcast(&value) { - let df = NuLazyFrame::try_from_value(value)?; - command_lazy(engine_state, stack, call, df) - } else if NuDataFrame::can_downcast(&value) { - let df = NuDataFrame::try_from_value(value)?; - command_eager(engine_state, stack, call, df) - } else { - Err(ShellError::CantConvert { - to_type: "lazy or eager dataframe".into(), - from_type: value.get_type().to_string(), - span: value.span(), - help: None, - }) - } - } -} - -fn command_eager( - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - mut df: NuDataFrame, -) -> Result { - let new_column: Value = call.req(engine_state, stack, 0)?; - let column_span = new_column.span(); - - if NuExpression::can_downcast(&new_column) { - let vals: Vec = call.rest(engine_state, stack, 0)?; - let value = Value::list(vals, call.head); - let expressions = NuExpression::extract_exprs(value)?; - let lazy = NuLazyFrame::new(true, df.lazy().with_columns(&expressions)); - - let df = lazy.collect(call.head)?; - - Ok(PipelineData::Value(df.into_value(call.head), None)) - } else { - let mut other = NuDataFrame::try_from_value(new_column)?.as_series(column_span)?; - - let name = match call.get_flag::(engine_state, stack, "name")? { - Some(name) => name, - None => other.name().to_string(), - }; - - let series = other.rename(&name).clone(); - - df.as_mut() - .with_column(series) - .map_err(|e| ShellError::GenericError { - error: "Error adding column to dataframe".into(), - msg: e.to_string(), - span: Some(column_span), - help: None, - inner: vec![], - }) - .map(|df| { - PipelineData::Value( - NuDataFrame::dataframe_into_value(df.clone(), call.head), - None, - ) - }) - } -} - -fn command_lazy( - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - lazy: NuLazyFrame, -) -> Result { - let vals: Vec = call.rest(engine_state, stack, 0)?; - let value = Value::list(vals, call.head); - let expressions = NuExpression::extract_exprs(value)?; - - let lazy: NuLazyFrame = lazy.into_polars().with_columns(&expressions).into(); - - Ok(PipelineData::Value( - NuLazyFrame::into_value(lazy, call.head)?, - None, - )) -} - -#[cfg(test)] -mod test { - use super::super::super::test_dataframe::test_dataframe; - use super::*; - use crate::dataframe::expressions::ExprAlias; - use crate::dataframe::expressions::ExprCol; - - #[test] - fn test_examples() { - test_dataframe(vec![ - Box::new(WithColumn {}), - Box::new(ExprAlias {}), - Box::new(ExprCol {}), - ]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/expressions/alias.rs b/crates/nu-cmd-dataframe/src/dataframe/expressions/alias.rs deleted file mode 100644 index 9d36100276..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/expressions/alias.rs +++ /dev/null @@ -1,86 +0,0 @@ -use crate::dataframe::values::NuExpression; -use nu_engine::command_prelude::*; - -#[derive(Clone)] -pub struct ExprAlias; - -impl Command for ExprAlias { - fn name(&self) -> &str { - "dfr as" - } - - fn usage(&self) -> &str { - "Creates an alias expression." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .required( - "Alias name", - SyntaxShape::String, - "Alias name for the expression", - ) - .input_output_type( - Type::Custom("expression".into()), - Type::Custom("expression".into()), - ) - .category(Category::Custom("expression".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Creates and alias expression", - example: "dfr col a | dfr as new_a | dfr into-nu", - result: { - let record = Value::test_record(record! { - "expr" => Value::test_record(record! { - "expr" => Value::test_string("column"), - "value" => Value::test_string("a"), - }), - "alias" => Value::test_string("new_a"), - }); - - Some(record) - }, - }] - } - - fn search_terms(&self) -> Vec<&str> { - vec!["aka", "abbr", "otherwise"] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - let alias: String = call.req(engine_state, stack, 0)?; - - let expr = NuExpression::try_from_pipeline(input, call.head)?; - let expr: NuExpression = expr.into_polars().alias(alias.as_str()).into(); - - Ok(PipelineData::Value( - NuExpression::into_value(expr, call.head), - None, - )) - } -} - -#[cfg(test)] -mod test { - use super::super::super::test_dataframe::test_dataframe; - use super::*; - use crate::dataframe::eager::ToNu; - use crate::dataframe::expressions::ExprCol; - - #[test] - fn test_examples() { - test_dataframe(vec![ - Box::new(ExprAlias {}), - Box::new(ExprCol {}), - Box::new(ToNu {}), - ]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/expressions/arg_where.rs b/crates/nu-cmd-dataframe/src/dataframe/expressions/arg_where.rs deleted file mode 100644 index 49c13c3f44..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/expressions/arg_where.rs +++ /dev/null @@ -1,78 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame, NuExpression}; -use nu_engine::command_prelude::*; - -use polars::prelude::arg_where; - -#[derive(Clone)] -pub struct ExprArgWhere; - -impl Command for ExprArgWhere { - fn name(&self) -> &str { - "dfr arg-where" - } - - fn usage(&self) -> &str { - "Creates an expression that returns the arguments where expression is true." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .required("column name", SyntaxShape::Any, "Expression to evaluate") - .input_output_type(Type::Any, Type::Custom("expression".into())) - .category(Category::Custom("expression".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Return a dataframe where the value match the expression", - example: "let df = ([[a b]; [one 1] [two 2] [three 3]] | dfr into-df); - $df | dfr select (dfr arg-where ((dfr col b) >= 2) | dfr as b_arg)", - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new( - "b_arg".to_string(), - vec![Value::test_int(1), Value::test_int(2)], - )], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] - } - - fn search_terms(&self) -> Vec<&str> { - vec!["condition", "match", "if"] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - _input: PipelineData, - ) -> Result { - let value: Value = call.req(engine_state, stack, 0)?; - let expr = NuExpression::try_from_value(value)?; - let expr: NuExpression = arg_where(expr.into_polars()).into(); - - Ok(PipelineData::Value(expr.into_value(call.head), None)) - } -} - -#[cfg(test)] -mod test { - use super::super::super::test_dataframe::test_dataframe; - use super::*; - use crate::dataframe::expressions::ExprAlias; - use crate::dataframe::lazy::LazySelect; - - #[test] - fn test_examples() { - test_dataframe(vec![ - Box::new(ExprArgWhere {}), - Box::new(ExprAlias {}), - Box::new(LazySelect {}), - ]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/expressions/col.rs b/crates/nu-cmd-dataframe/src/dataframe/expressions/col.rs deleted file mode 100644 index 1520ef995d..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/expressions/col.rs +++ /dev/null @@ -1,68 +0,0 @@ -use crate::dataframe::values::NuExpression; -use nu_engine::command_prelude::*; - -use polars::prelude::col; - -#[derive(Clone)] -pub struct ExprCol; - -impl Command for ExprCol { - fn name(&self) -> &str { - "dfr col" - } - - fn usage(&self) -> &str { - "Creates a named column expression." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .required( - "column name", - SyntaxShape::String, - "Name of column to be used", - ) - .input_output_type(Type::Any, Type::Custom("expression".into())) - .category(Category::Custom("expression".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Creates a named column expression and converts it to a nu object", - example: "dfr col a | dfr into-nu", - result: Some(Value::test_record(record! { - "expr" => Value::test_string("column"), - "value" => Value::test_string("a"), - })), - }] - } - - fn search_terms(&self) -> Vec<&str> { - vec!["create"] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - _input: PipelineData, - ) -> Result { - let name: String = call.req(engine_state, stack, 0)?; - let expr: NuExpression = col(name.as_str()).into(); - - Ok(PipelineData::Value(expr.into_value(call.head), None)) - } -} - -#[cfg(test)] -mod test { - use super::super::super::test_dataframe::test_dataframe; - use super::*; - use crate::dataframe::eager::ToNu; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(ExprCol {}), Box::new(ToNu {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/expressions/concat_str.rs b/crates/nu-cmd-dataframe/src/dataframe/expressions/concat_str.rs deleted file mode 100644 index 28f9bbda71..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/expressions/concat_str.rs +++ /dev/null @@ -1,108 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame, NuExpression}; -use nu_engine::command_prelude::*; - -use polars::prelude::concat_str; - -#[derive(Clone)] -pub struct ExprConcatStr; - -impl Command for ExprConcatStr { - fn name(&self) -> &str { - "dfr concat-str" - } - - fn usage(&self) -> &str { - "Creates a concat string expression." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .required( - "separator", - SyntaxShape::String, - "Separator used during the concatenation", - ) - .required( - "concat expressions", - SyntaxShape::List(Box::new(SyntaxShape::Any)), - "Expression(s) that define the string concatenation", - ) - .input_output_type(Type::Any, Type::Custom("expression".into())) - .category(Category::Custom("expression".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Creates a concat string expression", - example: r#"let df = ([[a b c]; [one two 1] [three four 2]] | dfr into-df); - $df | dfr with-column ((dfr concat-str "-" [(dfr col a) (dfr col b) ((dfr col c) * 2)]) | dfr as concat)"#, - result: Some( - NuDataFrame::try_from_columns( - vec![ - Column::new( - "a".to_string(), - vec![Value::test_string("one"), Value::test_string("three")], - ), - Column::new( - "b".to_string(), - vec![Value::test_string("two"), Value::test_string("four")], - ), - Column::new( - "c".to_string(), - vec![Value::test_int(1), Value::test_int(2)], - ), - Column::new( - "concat".to_string(), - vec![ - Value::test_string("one-two-2"), - Value::test_string("three-four-4"), - ], - ), - ], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] - } - - fn search_terms(&self) -> Vec<&str> { - vec!["join", "connect", "update"] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - _input: PipelineData, - ) -> Result { - let separator: String = call.req(engine_state, stack, 0)?; - let value: Value = call.req(engine_state, stack, 1)?; - - let expressions = NuExpression::extract_exprs(value)?; - let expr: NuExpression = concat_str(expressions, &separator, false).into(); - - Ok(PipelineData::Value(expr.into_value(call.head), None)) - } -} - -#[cfg(test)] -mod test { - use super::super::super::test_dataframe::test_dataframe; - use super::*; - use crate::dataframe::eager::WithColumn; - use crate::dataframe::expressions::alias::ExprAlias; - use crate::dataframe::expressions::col::ExprCol; - - #[test] - fn test_examples() { - test_dataframe(vec![ - Box::new(ExprConcatStr {}), - Box::new(ExprAlias {}), - Box::new(ExprCol {}), - Box::new(WithColumn {}), - ]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/expressions/datepart.rs b/crates/nu-cmd-dataframe/src/dataframe/expressions/datepart.rs deleted file mode 100644 index 60913c0dc6..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/expressions/datepart.rs +++ /dev/null @@ -1,170 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame, NuExpression}; -use chrono::{DateTime, FixedOffset}; -use nu_engine::command_prelude::*; - -use polars::{ - datatypes::{DataType, TimeUnit}, - prelude::NamedFrom, - series::Series, -}; - -#[derive(Clone)] -pub struct ExprDatePart; - -impl Command for ExprDatePart { - fn name(&self) -> &str { - "dfr datepart" - } - - fn usage(&self) -> &str { - "Creates an expression for capturing the specified datepart in a column." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .required( - "Datepart name", - SyntaxShape::String, - "Part of the date to capture. Possible values are year, quarter, month, week, weekday, day, hour, minute, second, millisecond, microsecond, nanosecond", - ) - .input_output_type( - Type::Custom("expression".into()), - Type::Custom("expression".into()), - ) - .category(Category::Custom("expression".into())) - } - - fn examples(&self) -> Vec { - let dt = DateTime::::parse_from_str( - "2021-12-30T01:02:03.123456789 +0000", - "%Y-%m-%dT%H:%M:%S.%9f %z", - ) - .expect("date calculation should not fail in test"); - vec![ - Example { - description: "Creates an expression to capture the year date part", - example: r#"[["2021-12-30T01:02:03.123456789"]] | dfr into-df | dfr as-datetime "%Y-%m-%dT%H:%M:%S.%9f" | dfr with-column [(dfr col datetime | dfr datepart year | dfr as datetime_year )]"#, - result: Some( - NuDataFrame::try_from_columns( - vec![ - Column::new("datetime".to_string(), vec![Value::test_date(dt)]), - Column::new("datetime_year".to_string(), vec![Value::test_int(2021)]), - ], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - Example { - description: "Creates an expression to capture multiple date parts", - example: r#"[["2021-12-30T01:02:03.123456789"]] | dfr into-df | dfr as-datetime "%Y-%m-%dT%H:%M:%S.%9f" | - dfr with-column [ (dfr col datetime | dfr datepart year | dfr as datetime_year ), - (dfr col datetime | dfr datepart month | dfr as datetime_month ), - (dfr col datetime | dfr datepart day | dfr as datetime_day ), - (dfr col datetime | dfr datepart hour | dfr as datetime_hour ), - (dfr col datetime | dfr datepart minute | dfr as datetime_minute ), - (dfr col datetime | dfr datepart second | dfr as datetime_second ), - (dfr col datetime | dfr datepart nanosecond | dfr as datetime_ns ) ]"#, - result: Some( - NuDataFrame::try_from_series( - vec![ - Series::new("datetime", &[dt.timestamp_nanos_opt()]) - .cast(&DataType::Datetime(TimeUnit::Nanoseconds, None)) - .expect("Error casting to datetime type"), - Series::new("datetime_year", &[2021_i64]), // i32 was coerced to i64 - Series::new("datetime_month", &[12_i8]), - Series::new("datetime_day", &[30_i8]), - Series::new("datetime_hour", &[1_i8]), - Series::new("datetime_minute", &[2_i8]), - Series::new("datetime_second", &[3_i8]), - Series::new("datetime_ns", &[123456789_i64]), // i32 was coerced to i64 - ], - Span::test_data(), - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - ] - } - - fn search_terms(&self) -> Vec<&str> { - vec![ - "year", - "month", - "week", - "weekday", - "quarter", - "day", - "hour", - "minute", - "second", - "millisecond", - "microsecond", - "nanosecond", - ] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - let part: Spanned = call.req(engine_state, stack, 0)?; - - let expr = NuExpression::try_from_pipeline(input, call.head)?; - let expr_dt = expr.into_polars().dt(); - let expr = match part.item.as_str() { - "year" => expr_dt.year(), - "quarter" => expr_dt.quarter(), - "month" => expr_dt.month(), - "week" => expr_dt.week(), - "day" => expr_dt.day(), - "hour" => expr_dt.hour(), - "minute" => expr_dt.minute(), - "second" => expr_dt.second(), - "millisecond" => expr_dt.millisecond(), - "microsecond" => expr_dt.microsecond(), - "nanosecond" => expr_dt.nanosecond(), - _ => { - return Err(ShellError::UnsupportedInput { - msg: format!("{} is not a valid datepart, expected one of year, month, day, hour, minute, second, millisecond, microsecond, nanosecond", part.item), - input: "value originates from here".to_string(), - msg_span: call.head, - input_span: part.span, - }); - } - }.into(); - - Ok(PipelineData::Value( - NuExpression::into_value(expr, call.head), - None, - )) - } -} - -#[cfg(test)] -mod test { - use super::super::super::test_dataframe::test_dataframe; - use super::*; - use crate::dataframe::eager::ToNu; - use crate::dataframe::eager::WithColumn; - use crate::dataframe::expressions::ExprAlias; - use crate::dataframe::expressions::ExprCol; - use crate::dataframe::series::AsDateTime; - - #[test] - fn test_examples() { - test_dataframe(vec![ - Box::new(ExprDatePart {}), - Box::new(ExprCol {}), - Box::new(ToNu {}), - Box::new(AsDateTime {}), - Box::new(WithColumn {}), - Box::new(ExprAlias {}), - ]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/expressions/expressions_macro.rs b/crates/nu-cmd-dataframe/src/dataframe/expressions/expressions_macro.rs deleted file mode 100644 index 4cc56e030b..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/expressions/expressions_macro.rs +++ /dev/null @@ -1,736 +0,0 @@ -/// Definition of multiple Expression commands using a macro rule -/// All of these expressions have an identical body and only require -/// to have a change in the name, description and expression function -use crate::dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame}; -use nu_engine::command_prelude::*; - -// The structs defined in this file are structs that form part of other commands -// since they share a similar name -macro_rules! expr_command { - ($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident, $test: ident) => { - #[derive(Clone)] - pub struct $command; - - impl Command for $command { - fn name(&self) -> &str { - $name - } - - fn usage(&self) -> &str { - $desc - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .input_output_type( - Type::Custom("expression".into()), - Type::Custom("expression".into()), - ) - .category(Category::Custom("expression".into())) - } - - fn examples(&self) -> Vec { - $examples - } - - fn run( - &self, - _engine_state: &EngineState, - _stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - let expr = NuExpression::try_from_pipeline(input, call.head)?; - let expr: NuExpression = expr.into_polars().$func().into(); - - Ok(PipelineData::Value( - NuExpression::into_value(expr, call.head), - None, - )) - } - } - - #[cfg(test)] - mod $test { - use super::super::super::test_dataframe::test_dataframe; - use super::*; - use crate::dataframe::lazy::aggregate::LazyAggregate; - use crate::dataframe::lazy::groupby::ToLazyGroupBy; - - #[test] - fn test_examples() { - test_dataframe(vec![ - Box::new($command {}), - Box::new(LazyAggregate {}), - Box::new(ToLazyGroupBy {}), - ]) - } - } - }; - - ($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident, $test: ident, $ddof: expr) => { - #[derive(Clone)] - pub struct $command; - - impl Command for $command { - fn name(&self) -> &str { - $name - } - - fn usage(&self) -> &str { - $desc - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .input_output_type( - Type::Custom("expression".into()), - Type::Custom("expression".into()), - ) - .category(Category::Custom("expression".into())) - } - - fn examples(&self) -> Vec { - $examples - } - - fn run( - &self, - _engine_state: &EngineState, - _stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - let expr = NuExpression::try_from_pipeline(input, call.head)?; - let expr: NuExpression = expr.into_polars().$func($ddof).into(); - - Ok(PipelineData::Value( - NuExpression::into_value(expr, call.head), - None, - )) - } - } - - #[cfg(test)] - mod $test { - use super::super::super::test_dataframe::test_dataframe; - use super::*; - use crate::dataframe::lazy::aggregate::LazyAggregate; - use crate::dataframe::lazy::groupby::ToLazyGroupBy; - - #[test] - fn test_examples() { - test_dataframe(vec![ - Box::new($command {}), - Box::new(LazyAggregate {}), - Box::new(ToLazyGroupBy {}), - ]) - } - } - }; -} - -// The structs defined in this file are structs that form part of other commands -// since they share a similar name -macro_rules! lazy_expr_command { - ($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident, $test: ident) => { - #[derive(Clone)] - pub struct $command; - - impl Command for $command { - fn name(&self) -> &str { - $name - } - - fn usage(&self) -> &str { - $desc - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .input_output_types(vec![ - ( - Type::Custom("expression".into()), - Type::Custom("expression".into()), - ), - ( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ), - ]) - .category(Category::Custom("expression".into())) - } - - fn examples(&self) -> Vec { - $examples - } - - fn run( - &self, - _engine_state: &EngineState, - _stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - let value = input.into_value(call.head)?; - if NuDataFrame::can_downcast(&value) { - let lazy = NuLazyFrame::try_from_value(value)?; - let lazy = NuLazyFrame::new( - lazy.from_eager, - lazy.into_polars() - .$func() - .map_err(|e| ShellError::GenericError { - error: "Dataframe Error".into(), - msg: e.to_string(), - help: None, - span: None, - inner: vec![], - })?, - ); - - Ok(PipelineData::Value(lazy.into_value(call.head)?, None)) - } else { - let expr = NuExpression::try_from_value(value)?; - let expr: NuExpression = expr.into_polars().$func().into(); - - Ok(PipelineData::Value( - NuExpression::into_value(expr, call.head), - None, - )) - } - } - } - - #[cfg(test)] - mod $test { - use super::super::super::test_dataframe::{ - build_test_engine_state, test_dataframe_example, - }; - use super::*; - use crate::dataframe::lazy::aggregate::LazyAggregate; - use crate::dataframe::lazy::groupby::ToLazyGroupBy; - - #[test] - fn test_examples_dataframe() { - // the first example should be a for the dataframe case - let example = &$command.examples()[0]; - let mut engine_state = build_test_engine_state(vec![Box::new($command {})]); - test_dataframe_example(&mut engine_state, &example) - } - - #[test] - fn test_examples_expressions() { - // the second example should be a for the dataframe case - let example = &$command.examples()[1]; - let mut engine_state = build_test_engine_state(vec![ - Box::new($command {}), - Box::new(LazyAggregate {}), - Box::new(ToLazyGroupBy {}), - ]); - test_dataframe_example(&mut engine_state, &example) - } - } - }; - - ($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident, $test: ident, $ddof: expr) => { - #[derive(Clone)] - pub struct $command; - - impl Command for $command { - fn name(&self) -> &str { - $name - } - - fn usage(&self) -> &str { - $desc - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .input_output_types(vec![ - ( - Type::Custom("expression".into()), - Type::Custom("expression".into()), - ), - ( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ), - ]) - .category(Category::Custom("expression".into())) - } - - fn examples(&self) -> Vec { - $examples - } - - fn run( - &self, - _engine_state: &EngineState, - _stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - let value = input.into_value(call.head)?; - if NuDataFrame::can_downcast(&value) { - let lazy = NuLazyFrame::try_from_value(value)?; - let lazy = NuLazyFrame::new( - lazy.from_eager, - lazy.into_polars() - .$func($ddof) - .map_err(|e| ShellError::GenericError { - error: "Dataframe Error".into(), - msg: e.to_string(), - help: None, - span: None, - inner: vec![], - })?, - ); - - Ok(PipelineData::Value(lazy.into_value(call.head)?, None)) - } else { - let expr = NuExpression::try_from_value(value)?; - let expr: NuExpression = expr.into_polars().$func($ddof).into(); - - Ok(PipelineData::Value( - NuExpression::into_value(expr, call.head), - None, - )) - } - } - } - - #[cfg(test)] - mod $test { - use super::super::super::test_dataframe::{ - build_test_engine_state, test_dataframe_example, - }; - use super::*; - use crate::dataframe::lazy::aggregate::LazyAggregate; - use crate::dataframe::lazy::groupby::ToLazyGroupBy; - - #[test] - fn test_examples_dataframe() { - // the first example should be a for the dataframe case - let example = &$command.examples()[0]; - let mut engine_state = build_test_engine_state(vec![Box::new($command {})]); - test_dataframe_example(&mut engine_state, &example) - } - - #[test] - fn test_examples_expressions() { - // the second example should be a for the dataframe case - let example = &$command.examples()[1]; - let mut engine_state = build_test_engine_state(vec![ - Box::new($command {}), - Box::new(LazyAggregate {}), - Box::new(ToLazyGroupBy {}), - ]); - test_dataframe_example(&mut engine_state, &example) - } - } - }; -} - -// ExprList command -// Expands to a command definition for a list expression -expr_command!( - ExprList, - "dfr implode", - "Aggregates a group to a Series.", - vec![Example { - description: "", - example: "", - result: None, - }], - implode, - test_implode -); - -// ExprAggGroups command -// Expands to a command definition for a agg groups expression -expr_command!( - ExprAggGroups, - "dfr agg-groups", - "Creates an agg_groups expression.", - vec![Example { - description: "", - example: "", - result: None, - }], - agg_groups, - test_groups -); - -// ExprCount command -// Expands to a command definition for a count expression -expr_command!( - ExprCount, - "dfr count", - "Creates a count expression.", - vec![Example { - description: "", - example: "", - result: None, - }], - count, - test_count -); - -// ExprNot command -// Expands to a command definition for a not expression -expr_command!( - ExprNot, - "dfr expr-not", - "Creates a not expression.", - vec![Example { - description: "Creates a not expression", - example: "(dfr col a) > 2) | dfr expr-not", - result: None, - },], - not, - test_not -); - -// ExprMax command -// Expands to a command definition for max aggregation -lazy_expr_command!( - ExprMax, - "dfr max", - "Creates a max expression or aggregates columns to their max value.", - vec![ - Example { - description: "Max value from columns in a dataframe", - example: "[[a b]; [6 2] [1 4] [4 1]] | dfr into-df | dfr max", - result: Some( - NuDataFrame::try_from_columns( - vec![ - Column::new("a".to_string(), vec![Value::test_int(6)],), - Column::new("b".to_string(), vec![Value::test_int(4)],), - ], - None - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - Example { - description: "Max aggregation for a group-by", - example: r#"[[a b]; [one 2] [one 4] [two 1]] - | dfr into-df - | dfr group-by a - | dfr agg (dfr col b | dfr max)"#, - result: Some( - NuDataFrame::try_from_columns( - vec![ - Column::new( - "a".to_string(), - vec![Value::test_string("one"), Value::test_string("two")], - ), - Column::new( - "b".to_string(), - vec![Value::test_int(4), Value::test_int(1)], - ), - ], - None - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - ], - max, - test_max -); - -// ExprMin command -// Expands to a command definition for min aggregation -lazy_expr_command!( - ExprMin, - "dfr min", - "Creates a min expression or aggregates columns to their min value.", - vec![ - Example { - description: "Min value from columns in a dataframe", - example: "[[a b]; [6 2] [1 4] [4 1]] | dfr into-df | dfr min", - result: Some( - NuDataFrame::try_from_columns( - vec![ - Column::new("a".to_string(), vec![Value::test_int(1)],), - Column::new("b".to_string(), vec![Value::test_int(1)],), - ], - None - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - Example { - description: "Min aggregation for a group-by", - example: r#"[[a b]; [one 2] [one 4] [two 1]] - | dfr into-df - | dfr group-by a - | dfr agg (dfr col b | dfr min)"#, - result: Some( - NuDataFrame::try_from_columns( - vec![ - Column::new( - "a".to_string(), - vec![Value::test_string("one"), Value::test_string("two")], - ), - Column::new( - "b".to_string(), - vec![Value::test_int(2), Value::test_int(1)], - ), - ], - None - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - ], - min, - test_min -); - -// ExprSum command -// Expands to a command definition for sum aggregation -lazy_expr_command!( - ExprSum, - "dfr sum", - "Creates a sum expression for an aggregation or aggregates columns to their sum value.", - vec![ - Example { - description: "Sums all columns in a dataframe", - example: "[[a b]; [6 2] [1 4] [4 1]] | dfr into-df | dfr sum", - result: Some( - NuDataFrame::try_from_columns( - vec![ - Column::new("a".to_string(), vec![Value::test_int(11)],), - Column::new("b".to_string(), vec![Value::test_int(7)],), - ], - None - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - Example { - description: "Sum aggregation for a group-by", - example: r#"[[a b]; [one 2] [one 4] [two 1]] - | dfr into-df - | dfr group-by a - | dfr agg (dfr col b | dfr sum)"#, - result: Some( - NuDataFrame::try_from_columns( - vec![ - Column::new( - "a".to_string(), - vec![Value::test_string("one"), Value::test_string("two")], - ), - Column::new( - "b".to_string(), - vec![Value::test_int(6), Value::test_int(1)], - ), - ], - None - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - ], - sum, - test_sum -); - -// ExprMean command -// Expands to a command definition for mean aggregation -lazy_expr_command!( - ExprMean, - "dfr mean", - "Creates a mean expression for an aggregation or aggregates columns to their mean value.", - vec![ - Example { - description: "Mean value from columns in a dataframe", - example: "[[a b]; [6 2] [4 2] [2 2]] | dfr into-df | dfr mean", - result: Some( - NuDataFrame::try_from_columns( - vec![ - Column::new("a".to_string(), vec![Value::test_float(4.0)],), - Column::new("b".to_string(), vec![Value::test_float(2.0)],), - ], - None - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - Example { - description: "Mean aggregation for a group-by", - example: r#"[[a b]; [one 2] [one 4] [two 1]] - | dfr into-df - | dfr group-by a - | dfr agg (dfr col b | dfr mean)"#, - result: Some( - NuDataFrame::try_from_columns( - vec![ - Column::new( - "a".to_string(), - vec![Value::test_string("one"), Value::test_string("two")], - ), - Column::new( - "b".to_string(), - vec![Value::test_float(3.0), Value::test_float(1.0)], - ), - ], - None - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - ], - mean, - test_mean -); - -// ExprMedian command -// Expands to a command definition for median aggregation -expr_command!( - ExprMedian, - "dfr median", - "Creates a median expression for an aggregation.", - vec![Example { - description: "Median aggregation for a group-by", - example: r#"[[a b]; [one 2] [one 4] [two 1]] - | dfr into-df - | dfr group-by a - | dfr agg (dfr col b | dfr median)"#, - result: Some( - NuDataFrame::try_from_columns( - vec![ - Column::new( - "a".to_string(), - vec![Value::test_string("one"), Value::test_string("two")], - ), - Column::new( - "b".to_string(), - vec![Value::test_float(3.0), Value::test_float(1.0)], - ), - ], - None - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - },], - median, - test_median -); - -// ExprStd command -// Expands to a command definition for std aggregation -lazy_expr_command!( - ExprStd, - "dfr std", - "Creates a std expression for an aggregation of std value from columns in a dataframe.", - vec![ - Example { - description: "Std value from columns in a dataframe", - example: "[[a b]; [6 2] [4 2] [2 2]] | dfr into-df | dfr std", - result: Some( - NuDataFrame::try_from_columns( - vec![ - Column::new("a".to_string(), vec![Value::test_float(2.0)],), - Column::new("b".to_string(), vec![Value::test_float(0.0)],), - ], - None - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - Example { - description: "Std aggregation for a group-by", - example: r#"[[a b]; [one 2] [one 2] [two 1] [two 1]] - | dfr into-df - | dfr group-by a - | dfr agg (dfr col b | dfr std)"#, - result: Some( - NuDataFrame::try_from_columns( - vec![ - Column::new( - "a".to_string(), - vec![Value::test_string("one"), Value::test_string("two")], - ), - Column::new( - "b".to_string(), - vec![Value::test_float(0.0), Value::test_float(0.0)], - ), - ], - None - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - ], - std, - test_std, - 1 -); - -// ExprVar command -// Expands to a command definition for var aggregation -lazy_expr_command!( - ExprVar, - "dfr var", - "Create a var expression for an aggregation.", - vec![ - Example { - description: - "Var value from columns in a dataframe or aggregates columns to their var value", - example: "[[a b]; [6 2] [4 2] [2 2]] | dfr into-df | dfr var", - result: Some( - NuDataFrame::try_from_columns( - vec![ - Column::new("a".to_string(), vec![Value::test_float(4.0)],), - Column::new("b".to_string(), vec![Value::test_float(0.0)],), - ], - None - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - Example { - description: "Var aggregation for a group-by", - example: r#"[[a b]; [one 2] [one 2] [two 1] [two 1]] - | dfr into-df - | dfr group-by a - | dfr agg (dfr col b | dfr var)"#, - result: Some( - NuDataFrame::try_from_columns( - vec![ - Column::new( - "a".to_string(), - vec![Value::test_string("one"), Value::test_string("two")], - ), - Column::new( - "b".to_string(), - vec![Value::test_float(0.0), Value::test_float(0.0)], - ), - ], - None - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - ], - var, - test_var, - 1 -); diff --git a/crates/nu-cmd-dataframe/src/dataframe/expressions/is_in.rs b/crates/nu-cmd-dataframe/src/dataframe/expressions/is_in.rs deleted file mode 100644 index 1579ba0e20..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/expressions/is_in.rs +++ /dev/null @@ -1,116 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame, NuExpression}; -use nu_engine::command_prelude::*; - -use polars::prelude::{lit, DataType}; - -#[derive(Clone)] -pub struct ExprIsIn; - -impl Command for ExprIsIn { - fn name(&self) -> &str { - "dfr is-in" - } - - fn usage(&self) -> &str { - "Creates an is-in expression." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .required( - "list", - SyntaxShape::List(Box::new(SyntaxShape::Any)), - "List to check if values are in", - ) - .input_output_type( - Type::Custom("expression".into()), - Type::Custom("expression".into()), - ) - .category(Category::Custom("expression".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Creates a is-in expression", - example: r#"let df = ([[a b]; [one 1] [two 2] [three 3]] | dfr into-df); - $df | dfr with-column (dfr col a | dfr is-in [one two] | dfr as a_in)"#, - result: Some( - NuDataFrame::try_from_columns( - vec![ - Column::new( - "a".to_string(), - vec![ - Value::test_string("one"), - Value::test_string("two"), - Value::test_string("three"), - ], - ), - Column::new( - "b".to_string(), - vec![Value::test_int(1), Value::test_int(2), Value::test_int(3)], - ), - Column::new( - "a_in".to_string(), - vec![ - Value::test_bool(true), - Value::test_bool(true), - Value::test_bool(false), - ], - ), - ], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] - } - - fn search_terms(&self) -> Vec<&str> { - vec!["check", "contained", "is-contain", "match"] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - let list: Vec = call.req(engine_state, stack, 0)?; - let expr = NuExpression::try_from_pipeline(input, call.head)?; - - let values = - NuDataFrame::try_from_columns(vec![Column::new("list".to_string(), list)], None)?; - let list = values.as_series(call.head)?; - - if matches!(list.dtype(), DataType::Object(..)) { - return Err(ShellError::IncompatibleParametersSingle { - msg: "Cannot use a mixed list as argument".into(), - span: call.head, - }); - } - - let expr: NuExpression = expr.into_polars().is_in(lit(list)).into(); - Ok(PipelineData::Value(expr.into_value(call.head), None)) - } -} - -#[cfg(test)] -mod test { - use super::super::super::test_dataframe::test_dataframe; - use super::*; - use crate::dataframe::eager::WithColumn; - use crate::dataframe::expressions::alias::ExprAlias; - use crate::dataframe::expressions::col::ExprCol; - - #[test] - fn test_examples() { - test_dataframe(vec![ - Box::new(ExprIsIn {}), - Box::new(ExprAlias {}), - Box::new(ExprCol {}), - Box::new(WithColumn {}), - ]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/expressions/lit.rs b/crates/nu-cmd-dataframe/src/dataframe/expressions/lit.rs deleted file mode 100644 index 8610a59048..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/expressions/lit.rs +++ /dev/null @@ -1,69 +0,0 @@ -use crate::dataframe::values::NuExpression; -use nu_engine::command_prelude::*; - -#[derive(Clone)] -pub struct ExprLit; - -impl Command for ExprLit { - fn name(&self) -> &str { - "dfr lit" - } - - fn usage(&self) -> &str { - "Creates a literal expression." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .required( - "literal", - SyntaxShape::Any, - "literal to construct the expression", - ) - .input_output_type(Type::Any, Type::Custom("expression".into())) - .category(Category::Custom("expression".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Created a literal expression and converts it to a nu object", - example: "dfr lit 2 | dfr into-nu", - result: Some(Value::test_record(record! { - "expr" => Value::test_string("literal"), - "value" => Value::test_string("2"), - })), - }] - } - - fn search_terms(&self) -> Vec<&str> { - vec!["string", "literal", "expression"] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - _input: PipelineData, - ) -> Result { - let literal: Value = call.req(engine_state, stack, 0)?; - - let expr = NuExpression::try_from_value(literal)?; - Ok(PipelineData::Value( - NuExpression::into_value(expr, call.head), - None, - )) - } -} - -#[cfg(test)] -mod test { - use super::super::super::test_dataframe::test_dataframe; - use super::*; - use crate::dataframe::eager::ToNu; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(ExprLit {}), Box::new(ToNu {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/expressions/mod.rs b/crates/nu-cmd-dataframe/src/dataframe/expressions/mod.rs deleted file mode 100644 index 4ba70d900d..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/expressions/mod.rs +++ /dev/null @@ -1,62 +0,0 @@ -mod alias; -mod arg_where; -mod col; -mod concat_str; -mod datepart; -mod expressions_macro; -mod is_in; -mod lit; -mod otherwise; -mod quantile; -mod when; - -use nu_protocol::engine::StateWorkingSet; - -pub(crate) use crate::dataframe::expressions::alias::ExprAlias; -use crate::dataframe::expressions::arg_where::ExprArgWhere; -pub(super) use crate::dataframe::expressions::col::ExprCol; -pub(super) use crate::dataframe::expressions::concat_str::ExprConcatStr; -pub(crate) use crate::dataframe::expressions::datepart::ExprDatePart; -pub(crate) use crate::dataframe::expressions::expressions_macro::*; -pub(super) use crate::dataframe::expressions::is_in::ExprIsIn; -pub(super) use crate::dataframe::expressions::lit::ExprLit; -pub(super) use crate::dataframe::expressions::otherwise::ExprOtherwise; -pub(super) use crate::dataframe::expressions::quantile::ExprQuantile; -pub(super) use crate::dataframe::expressions::when::ExprWhen; - -pub fn add_expressions(working_set: &mut StateWorkingSet) { - macro_rules! bind_command { - ( $command:expr ) => { - working_set.add_decl(Box::new($command)); - }; - ( $( $command:expr ),* ) => { - $( working_set.add_decl(Box::new($command)); )* - }; - } - - // Dataframe commands - bind_command!( - ExprAlias, - ExprArgWhere, - ExprCol, - ExprConcatStr, - ExprCount, - ExprLit, - ExprWhen, - ExprOtherwise, - ExprQuantile, - ExprList, - ExprAggGroups, - ExprCount, - ExprIsIn, - ExprNot, - ExprMax, - ExprMin, - ExprSum, - ExprMean, - ExprMedian, - ExprStd, - ExprVar, - ExprDatePart - ); -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/expressions/otherwise.rs b/crates/nu-cmd-dataframe/src/dataframe/expressions/otherwise.rs deleted file mode 100644 index eb97c575b7..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/expressions/otherwise.rs +++ /dev/null @@ -1,126 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame, NuExpression, NuWhen}; -use nu_engine::command_prelude::*; - -#[derive(Clone)] -pub struct ExprOtherwise; - -impl Command for ExprOtherwise { - fn name(&self) -> &str { - "dfr otherwise" - } - - fn usage(&self) -> &str { - "Completes a when expression." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .required( - "otherwise expression", - SyntaxShape::Any, - "expression to apply when no when predicate matches", - ) - .input_output_type(Type::Any, Type::Custom("expression".into())) - .category(Category::Custom("expression".into())) - } - - fn examples(&self) -> Vec { - vec![ - Example { - description: "Create a when conditions", - example: "dfr when ((dfr col a) > 2) 4 | dfr otherwise 5", - result: None, - }, - Example { - description: "Create a when conditions", - example: - "dfr when ((dfr col a) > 2) 4 | dfr when ((dfr col a) < 0) 6 | dfr otherwise 0", - result: None, - }, - Example { - description: "Create a new column for the dataframe", - example: r#"[[a b]; [6 2] [1 4] [4 1]] - | dfr into-lazy - | dfr with-column ( - dfr when ((dfr col a) > 2) 4 | dfr otherwise 5 | dfr as c - ) - | dfr with-column ( - dfr when ((dfr col a) > 5) 10 | dfr when ((dfr col a) < 2) 6 | dfr otherwise 0 | dfr as d - ) - | dfr collect"#, - result: Some( - NuDataFrame::try_from_columns( - vec![ - Column::new( - "a".to_string(), - vec![Value::test_int(6), Value::test_int(1), Value::test_int(4)], - ), - Column::new( - "b".to_string(), - vec![Value::test_int(2), Value::test_int(4), Value::test_int(1)], - ), - Column::new( - "c".to_string(), - vec![Value::test_int(4), Value::test_int(5), Value::test_int(4)], - ), - Column::new( - "d".to_string(), - vec![Value::test_int(10), Value::test_int(6), Value::test_int(0)], - ), - ], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - ] - } - - fn search_terms(&self) -> Vec<&str> { - vec!["condition", "else"] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - let otherwise_predicate: Value = call.req(engine_state, stack, 0)?; - let otherwise_predicate = NuExpression::try_from_value(otherwise_predicate)?; - - let value = input.into_value(call.head)?; - let complete: NuExpression = match NuWhen::try_from_value(value)? { - NuWhen::Then(then) => then.otherwise(otherwise_predicate.into_polars()).into(), - NuWhen::ChainedThen(chained_when) => chained_when - .otherwise(otherwise_predicate.into_polars()) - .into(), - }; - - Ok(PipelineData::Value(complete.into_value(call.head), None)) - } -} - -#[cfg(test)] -mod test { - use super::super::super::test_dataframe::test_dataframe; - use crate::dataframe::eager::{ToNu, WithColumn}; - use crate::dataframe::expressions::when::ExprWhen; - use crate::dataframe::expressions::{ExprAlias, ExprCol}; - - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![ - Box::new(WithColumn {}), - Box::new(ExprCol {}), - Box::new(ExprAlias {}), - Box::new(ExprWhen {}), - Box::new(ExprOtherwise {}), - Box::new(ToNu {}), - ]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/expressions/quantile.rs b/crates/nu-cmd-dataframe/src/dataframe/expressions/quantile.rs deleted file mode 100644 index aaa1029ee9..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/expressions/quantile.rs +++ /dev/null @@ -1,101 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame, NuExpression}; -use nu_engine::command_prelude::*; - -use polars::prelude::{lit, QuantileInterpolOptions}; - -#[derive(Clone)] -pub struct ExprQuantile; - -impl Command for ExprQuantile { - fn name(&self) -> &str { - "dfr quantile" - } - - fn usage(&self) -> &str { - "Aggregates the columns to the selected quantile." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .required( - "quantile", - SyntaxShape::Number, - "quantile value for quantile operation", - ) - .input_output_type( - Type::Custom("expression".into()), - Type::Custom("expression".into()), - ) - .category(Category::Custom("expression".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Quantile aggregation for a group-by", - example: r#"[[a b]; [one 2] [one 4] [two 1]] - | dfr into-df - | dfr group-by a - | dfr agg (dfr col b | dfr quantile 0.5)"#, - result: Some( - NuDataFrame::try_from_columns( - vec![ - Column::new( - "a".to_string(), - vec![Value::test_string("one"), Value::test_string("two")], - ), - Column::new( - "b".to_string(), - vec![Value::test_float(4.0), Value::test_float(1.0)], - ), - ], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] - } - - fn search_terms(&self) -> Vec<&str> { - vec!["statistics", "percentile", "distribution"] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - let value = input.into_value(call.head)?; - let quantile: f64 = call.req(engine_state, stack, 0)?; - - let expr = NuExpression::try_from_value(value)?; - let expr: NuExpression = expr - .into_polars() - .quantile(lit(quantile), QuantileInterpolOptions::default()) - .into(); - - Ok(PipelineData::Value( - NuExpression::into_value(expr, call.head), - None, - )) - } -} - -#[cfg(test)] -mod test { - use super::super::super::test_dataframe::test_dataframe; - use super::*; - use crate::dataframe::lazy::aggregate::LazyAggregate; - use crate::dataframe::lazy::groupby::ToLazyGroupBy; - - #[test] - fn test_examples() { - test_dataframe(vec![ - Box::new(ExprQuantile {}), - Box::new(LazyAggregate {}), - Box::new(ToLazyGroupBy {}), - ]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/expressions/when.rs b/crates/nu-cmd-dataframe/src/dataframe/expressions/when.rs deleted file mode 100644 index 5a6aad2de7..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/expressions/when.rs +++ /dev/null @@ -1,147 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame, NuExpression, NuWhen}; -use nu_engine::command_prelude::*; - -use polars::prelude::when; - -#[derive(Clone)] -pub struct ExprWhen; - -impl Command for ExprWhen { - fn name(&self) -> &str { - "dfr when" - } - - fn usage(&self) -> &str { - "Creates and modifies a when expression." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .required( - "when expression", - SyntaxShape::Any, - "when expression used for matching", - ) - .required( - "then expression", - SyntaxShape::Any, - "expression that will be applied when predicate is true", - ) - .input_output_type( - Type::Custom("expression".into()), - Type::Custom("expression".into()), - ) - .category(Category::Custom("expression".into())) - } - - fn examples(&self) -> Vec { - vec![ - Example { - description: "Create a when conditions", - example: "dfr when ((dfr col a) > 2) 4", - result: None, - }, - Example { - description: "Create a when conditions", - example: "dfr when ((dfr col a) > 2) 4 | dfr when ((dfr col a) < 0) 6", - result: None, - }, - Example { - description: "Create a new column for the dataframe", - example: r#"[[a b]; [6 2] [1 4] [4 1]] - | dfr into-lazy - | dfr with-column ( - dfr when ((dfr col a) > 2) 4 | dfr otherwise 5 | dfr as c - ) - | dfr with-column ( - dfr when ((dfr col a) > 5) 10 | dfr when ((dfr col a) < 2) 6 | dfr otherwise 0 | dfr as d - ) - | dfr collect"#, - result: Some( - NuDataFrame::try_from_columns( - vec![ - Column::new( - "a".to_string(), - vec![Value::test_int(6), Value::test_int(1), Value::test_int(4)], - ), - Column::new( - "b".to_string(), - vec![Value::test_int(2), Value::test_int(4), Value::test_int(1)], - ), - Column::new( - "c".to_string(), - vec![Value::test_int(4), Value::test_int(5), Value::test_int(4)], - ), - Column::new( - "d".to_string(), - vec![Value::test_int(10), Value::test_int(6), Value::test_int(0)], - ), - ], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - ] - } - - fn search_terms(&self) -> Vec<&str> { - vec!["condition", "match", "if", "else"] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - let when_predicate: Value = call.req(engine_state, stack, 0)?; - let when_predicate = NuExpression::try_from_value(when_predicate)?; - - let then_predicate: Value = call.req(engine_state, stack, 1)?; - let then_predicate = NuExpression::try_from_value(then_predicate)?; - - let value = input.into_value(call.head)?; - let when_then: NuWhen = match value { - Value::Nothing { .. } => when(when_predicate.into_polars()) - .then(then_predicate.into_polars()) - .into(), - v => match NuWhen::try_from_value(v)? { - NuWhen::Then(when_then) => when_then - .when(when_predicate.into_polars()) - .then(then_predicate.into_polars()) - .into(), - NuWhen::ChainedThen(when_then_then) => when_then_then - .when(when_predicate.into_polars()) - .then(then_predicate.into_polars()) - .into(), - }, - }; - - Ok(PipelineData::Value(when_then.into_value(call.head), None)) - } -} - -#[cfg(test)] -mod test { - use super::super::super::test_dataframe::test_dataframe; - use crate::dataframe::eager::{ToNu, WithColumn}; - use crate::dataframe::expressions::otherwise::ExprOtherwise; - use crate::dataframe::expressions::{ExprAlias, ExprCol}; - - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![ - Box::new(WithColumn {}), - Box::new(ExprCol {}), - Box::new(ExprAlias {}), - Box::new(ExprWhen {}), - Box::new(ExprOtherwise {}), - Box::new(ToNu {}), - ]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/lazy/aggregate.rs b/crates/nu-cmd-dataframe/src/dataframe/lazy/aggregate.rs deleted file mode 100644 index 715c3d156b..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/lazy/aggregate.rs +++ /dev/null @@ -1,216 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame, NuLazyGroupBy}; -use nu_engine::command_prelude::*; - -use polars::{datatypes::DataType, prelude::Expr}; - -#[derive(Clone)] -pub struct LazyAggregate; - -impl Command for LazyAggregate { - fn name(&self) -> &str { - "dfr agg" - } - - fn usage(&self) -> &str { - "Performs a series of aggregations from a group-by." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .rest( - "Group-by expressions", - SyntaxShape::Any, - "Expression(s) that define the aggregations to be applied", - ) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("lazyframe".into())) - } - - fn examples(&self) -> Vec { - vec![ - Example { - description: "Group by and perform an aggregation", - example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]] - | dfr into-df - | dfr group-by a - | dfr agg [ - (dfr col b | dfr min | dfr as "b_min") - (dfr col b | dfr max | dfr as "b_max") - (dfr col b | dfr sum | dfr as "b_sum") - ]"#, - result: Some( - NuDataFrame::try_from_columns( - vec![ - Column::new( - "a".to_string(), - vec![Value::test_int(1), Value::test_int(2)], - ), - Column::new( - "b_min".to_string(), - vec![Value::test_int(2), Value::test_int(4)], - ), - Column::new( - "b_max".to_string(), - vec![Value::test_int(4), Value::test_int(6)], - ), - Column::new( - "b_sum".to_string(), - vec![Value::test_int(6), Value::test_int(10)], - ), - ], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - Example { - description: "Group by and perform an aggregation", - example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]] - | dfr into-lazy - | dfr group-by a - | dfr agg [ - (dfr col b | dfr min | dfr as "b_min") - (dfr col b | dfr max | dfr as "b_max") - (dfr col b | dfr sum | dfr as "b_sum") - ] - | dfr collect"#, - result: Some( - NuDataFrame::try_from_columns( - vec![ - Column::new( - "a".to_string(), - vec![Value::test_int(1), Value::test_int(2)], - ), - Column::new( - "b_min".to_string(), - vec![Value::test_int(2), Value::test_int(4)], - ), - Column::new( - "b_max".to_string(), - vec![Value::test_int(4), Value::test_int(6)], - ), - Column::new( - "b_sum".to_string(), - vec![Value::test_int(6), Value::test_int(10)], - ), - ], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - ] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - let vals: Vec = call.rest(engine_state, stack, 0)?; - let value = Value::list(vals, call.head); - let expressions = NuExpression::extract_exprs(value)?; - - let group_by = NuLazyGroupBy::try_from_pipeline(input, call.head)?; - - if let Some(schema) = &group_by.schema { - for expr in expressions.iter() { - if let Some(name) = get_col_name(expr) { - let dtype = schema.get(name.as_str()); - - if matches!(dtype, Some(DataType::Object(..))) { - return Err(ShellError::GenericError { - error: "Object type column not supported for aggregation".into(), - msg: format!("Column '{name}' is type Object"), - span: Some(call.head), - help: Some("Aggregations cannot be performed on Object type columns. Use dtype command to check column types".into()), - inner: vec![], - }); - } - } - } - } - - let lazy = NuLazyFrame { - from_eager: group_by.from_eager, - lazy: Some(group_by.into_polars().agg(&expressions)), - schema: None, - }; - - let res = lazy.into_value(call.head)?; - Ok(PipelineData::Value(res, None)) - } -} - -fn get_col_name(expr: &Expr) -> Option { - match expr { - Expr::Column(column) => Some(column.to_string()), - Expr::Agg(agg) => match agg { - polars::prelude::AggExpr::Min { input: e, .. } - | polars::prelude::AggExpr::Max { input: e, .. } - | polars::prelude::AggExpr::Median(e) - | polars::prelude::AggExpr::NUnique(e) - | polars::prelude::AggExpr::First(e) - | polars::prelude::AggExpr::Last(e) - | polars::prelude::AggExpr::Mean(e) - | polars::prelude::AggExpr::Implode(e) - | polars::prelude::AggExpr::Count(e, _) - | polars::prelude::AggExpr::Sum(e) - | polars::prelude::AggExpr::AggGroups(e) - | polars::prelude::AggExpr::Std(e, _) - | polars::prelude::AggExpr::Var(e, _) => get_col_name(e.as_ref()), - polars::prelude::AggExpr::Quantile { expr, .. } => get_col_name(expr.as_ref()), - }, - Expr::Filter { input: expr, .. } - | Expr::Slice { input: expr, .. } - | Expr::Cast { expr, .. } - | Expr::Sort { expr, .. } - | Expr::Gather { expr, .. } - | Expr::SortBy { expr, .. } - | Expr::Exclude(expr, _) - | Expr::Alias(expr, _) - | Expr::KeepName(expr) - | Expr::Explode(expr) => get_col_name(expr.as_ref()), - Expr::Ternary { .. } - | Expr::AnonymousFunction { .. } - | Expr::Function { .. } - | Expr::Columns(_) - | Expr::DtypeColumn(_) - | Expr::Literal(_) - | Expr::BinaryExpr { .. } - | Expr::Window { .. } - | Expr::Wildcard - | Expr::RenameAlias { .. } - | Expr::Len - | Expr::Nth(_) - | Expr::SubPlan(_, _) - | Expr::Selector(_) => None, - } -} - -#[cfg(test)] -mod test { - use super::super::super::test_dataframe::test_dataframe; - use super::*; - use crate::dataframe::expressions::{ExprAlias, ExprMax, ExprMin, ExprSum}; - use crate::dataframe::lazy::groupby::ToLazyGroupBy; - - #[test] - fn test_examples() { - test_dataframe(vec![ - Box::new(LazyAggregate {}), - Box::new(ToLazyGroupBy {}), - Box::new(ExprAlias {}), - Box::new(ExprMin {}), - Box::new(ExprMax {}), - Box::new(ExprSum {}), - ]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/lazy/collect.rs b/crates/nu-cmd-dataframe/src/dataframe/lazy/collect.rs deleted file mode 100644 index c27591cc1d..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/lazy/collect.rs +++ /dev/null @@ -1,73 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame, NuLazyFrame}; -use nu_engine::command_prelude::*; - -#[derive(Clone)] -pub struct LazyCollect; - -impl Command for LazyCollect { - fn name(&self) -> &str { - "dfr collect" - } - - fn usage(&self) -> &str { - "Collect lazy dataframe into eager dataframe." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("lazyframe".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "drop duplicates", - example: "[[a b]; [1 2] [3 4]] | dfr into-lazy | dfr collect", - result: Some( - NuDataFrame::try_from_columns( - vec![ - Column::new( - "a".to_string(), - vec![Value::test_int(1), Value::test_int(3)], - ), - Column::new( - "b".to_string(), - vec![Value::test_int(2), Value::test_int(4)], - ), - ], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] - } - - fn run( - &self, - _engine_state: &EngineState, - _stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?; - let eager = lazy.collect(call.head)?; - let value = Value::custom(Box::new(eager), call.head); - - Ok(PipelineData::Value(value, None)) - } -} - -#[cfg(test)] -mod test { - use super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(LazyCollect {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/lazy/explode.rs b/crates/nu-cmd-dataframe/src/dataframe/lazy/explode.rs deleted file mode 100644 index a027e84d36..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/lazy/explode.rs +++ /dev/null @@ -1,153 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame}; -use nu_engine::command_prelude::*; - -#[derive(Clone)] -pub struct LazyExplode; - -impl Command for LazyExplode { - fn name(&self) -> &str { - "dfr explode" - } - - fn usage(&self) -> &str { - "Explodes a dataframe or creates a explode expression." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .rest( - "columns", - SyntaxShape::String, - "columns to explode, only applicable for dataframes", - ) - .input_output_types(vec![ - ( - Type::Custom("expression".into()), - Type::Custom("expression".into()), - ), - ( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ), - ]) - .category(Category::Custom("lazyframe".into())) - } - - fn examples(&self) -> Vec { - vec![ - Example { - description: "Explode the specified dataframe", - example: "[[id name hobbies]; [1 Mercy [Cycling Knitting]] [2 Bob [Skiing Football]]] | dfr into-df | dfr explode hobbies | dfr collect", - result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new( - "id".to_string(), - vec![ - Value::test_int(1), - Value::test_int(1), - Value::test_int(2), - Value::test_int(2), - ]), - Column::new( - "name".to_string(), - vec![ - Value::test_string("Mercy"), - Value::test_string("Mercy"), - Value::test_string("Bob"), - Value::test_string("Bob"), - ]), - Column::new( - "hobbies".to_string(), - vec![ - Value::test_string("Cycling"), - Value::test_string("Knitting"), - Value::test_string("Skiing"), - Value::test_string("Football"), - ]), - ], None).expect("simple df for test should not fail") - .into_value(Span::test_data()), - ) - }, - Example { - description: "Select a column and explode the values", - example: "[[id name hobbies]; [1 Mercy [Cycling Knitting]] [2 Bob [Skiing Football]]] | dfr into-df | dfr select (dfr col hobbies | dfr explode)", - result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new( - "hobbies".to_string(), - vec![ - Value::test_string("Cycling"), - Value::test_string("Knitting"), - Value::test_string("Skiing"), - Value::test_string("Football"), - ]), - ], None).expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - ] - } - - fn run( - &self, - _engine_state: &EngineState, - _stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - explode(call, input) - } -} - -pub(crate) fn explode(call: &Call, input: PipelineData) -> Result { - let value = input.into_value(call.head)?; - if NuDataFrame::can_downcast(&value) { - let df = NuLazyFrame::try_from_value(value)?; - let columns: Vec = call - .positional_iter() - .filter_map(|e| e.as_string()) - .collect(); - - let exploded = df - .into_polars() - .explode(columns.iter().map(AsRef::as_ref).collect::>()); - - Ok(PipelineData::Value( - NuLazyFrame::from(exploded).into_value(call.head)?, - None, - )) - } else { - let expr = NuExpression::try_from_value(value)?; - let expr: NuExpression = expr.into_polars().explode().into(); - - Ok(PipelineData::Value( - NuExpression::into_value(expr, call.head), - None, - )) - } -} - -#[cfg(test)] -mod test { - use super::super::super::test_dataframe::{build_test_engine_state, test_dataframe_example}; - use super::*; - use crate::dataframe::lazy::aggregate::LazyAggregate; - use crate::dataframe::lazy::groupby::ToLazyGroupBy; - - #[test] - fn test_examples_dataframe() { - let mut engine_state = build_test_engine_state(vec![Box::new(LazyExplode {})]); - test_dataframe_example(&mut engine_state, &LazyExplode.examples()[0]); - } - - #[ignore] - #[test] - fn test_examples_expression() { - let mut engine_state = build_test_engine_state(vec![ - Box::new(LazyExplode {}), - Box::new(LazyAggregate {}), - Box::new(ToLazyGroupBy {}), - ]); - test_dataframe_example(&mut engine_state, &LazyExplode.examples()[1]); - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/lazy/fetch.rs b/crates/nu-cmd-dataframe/src/dataframe/lazy/fetch.rs deleted file mode 100644 index 6ba75aa970..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/lazy/fetch.rs +++ /dev/null @@ -1,92 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame, NuLazyFrame}; -use nu_engine::command_prelude::*; - -#[derive(Clone)] -pub struct LazyFetch; - -impl Command for LazyFetch { - fn name(&self) -> &str { - "dfr fetch" - } - - fn usage(&self) -> &str { - "Collects the lazyframe to the selected rows." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .required( - "rows", - SyntaxShape::Int, - "number of rows to be fetched from lazyframe", - ) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("lazyframe".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Fetch a rows from the dataframe", - example: "[[a b]; [6 2] [4 2] [2 2]] | dfr into-df | dfr fetch 2", - result: Some( - NuDataFrame::try_from_columns( - vec![ - Column::new( - "a".to_string(), - vec![Value::test_int(6), Value::test_int(4)], - ), - Column::new( - "b".to_string(), - vec![Value::test_int(2), Value::test_int(2)], - ), - ], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - let rows: i64 = call.req(engine_state, stack, 0)?; - - let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?; - let eager: NuDataFrame = lazy - .into_polars() - .fetch(rows as usize) - .map_err(|e| ShellError::GenericError { - error: "Error fetching rows".into(), - msg: e.to_string(), - span: Some(call.head), - help: None, - inner: vec![], - })? - .into(); - - Ok(PipelineData::Value( - NuDataFrame::into_value(eager, call.head), - None, - )) - } -} - -#[cfg(test)] -mod test { - use super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(LazyFetch {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/lazy/fill_nan.rs b/crates/nu-cmd-dataframe/src/dataframe/lazy/fill_nan.rs deleted file mode 100644 index 4c75f1d9a3..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/lazy/fill_nan.rs +++ /dev/null @@ -1,143 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame, NuExpression}; -use nu_engine::command_prelude::*; - -#[derive(Clone)] -pub struct LazyFillNA; - -impl Command for LazyFillNA { - fn name(&self) -> &str { - "dfr fill-nan" - } - - fn usage(&self) -> &str { - "Replaces NaN values with the given expression." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .required( - "fill", - SyntaxShape::Any, - "Expression to use to fill the NAN values", - ) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("lazyframe".into())) - } - - fn examples(&self) -> Vec { - vec![ - Example { - description: "Fills the NaN values with 0", - example: "[1 2 NaN 3 NaN] | dfr into-df | dfr fill-nan 0", - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new( - "0".to_string(), - vec![ - Value::test_int(1), - Value::test_int(2), - Value::test_int(0), - Value::test_int(3), - Value::test_int(0), - ], - )], - None, - ) - .expect("Df for test should not fail") - .into_value(Span::test_data()), - ), - }, - Example { - description: "Fills the NaN values of a whole dataframe", - example: "[[a b]; [0.2 1] [0.1 NaN]] | dfr into-df | dfr fill-nan 0", - result: Some( - NuDataFrame::try_from_columns( - vec![ - Column::new( - "a".to_string(), - vec![Value::test_float(0.2), Value::test_float(0.1)], - ), - Column::new( - "b".to_string(), - vec![Value::test_int(1), Value::test_int(0)], - ), - ], - None, - ) - .expect("Df for test should not fail") - .into_value(Span::test_data()), - ), - }, - ] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - let fill: Value = call.req(engine_state, stack, 0)?; - let value = input.into_value(call.head)?; - - if NuExpression::can_downcast(&value) { - let expr = NuExpression::try_from_value(value)?; - let fill = NuExpression::try_from_value(fill)?.into_polars(); - let expr: NuExpression = expr.into_polars().fill_nan(fill).into(); - - Ok(PipelineData::Value( - NuExpression::into_value(expr, call.head), - None, - )) - } else { - let val_span = value.span(); - let frame = NuDataFrame::try_from_value(value)?; - let columns = frame.columns(val_span)?; - let dataframe = columns - .into_iter() - .map(|column| { - let column_name = column.name().to_string(); - let values = column - .into_iter() - .map(|value| { - let span = value.span(); - match value { - Value::Float { val, .. } => { - if val.is_nan() { - fill.clone() - } else { - value - } - } - Value::List { vals, .. } => { - NuDataFrame::fill_list_nan(vals, span, fill.clone()) - } - _ => value, - } - }) - .collect::>(); - Column::new(column_name, values) - }) - .collect::>(); - Ok(PipelineData::Value( - NuDataFrame::try_from_columns(dataframe, None)?.into_value(call.head), - None, - )) - } - } -} - -#[cfg(test)] -mod test { - use super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(LazyFillNA {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/lazy/fill_null.rs b/crates/nu-cmd-dataframe/src/dataframe/lazy/fill_null.rs deleted file mode 100644 index 88be2a9e88..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/lazy/fill_null.rs +++ /dev/null @@ -1,93 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame}; -use nu_engine::command_prelude::*; - -#[derive(Clone)] -pub struct LazyFillNull; - -impl Command for LazyFillNull { - fn name(&self) -> &str { - "dfr fill-null" - } - - fn usage(&self) -> &str { - "Replaces NULL values with the given expression." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .required( - "fill", - SyntaxShape::Any, - "Expression to use to fill the null values", - ) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("lazyframe".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Fills the null values by 0", - example: "[1 2 2 3 3] | dfr into-df | dfr shift 2 | dfr fill-null 0", - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new( - "0".to_string(), - vec![ - Value::test_int(0), - Value::test_int(0), - Value::test_int(1), - Value::test_int(2), - Value::test_int(2), - ], - )], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - let fill: Value = call.req(engine_state, stack, 0)?; - let value = input.into_value(call.head)?; - - if NuExpression::can_downcast(&value) { - let expr = NuExpression::try_from_value(value)?; - let fill = NuExpression::try_from_value(fill)?.into_polars(); - let expr: NuExpression = expr.into_polars().fill_null(fill).into(); - - Ok(PipelineData::Value( - NuExpression::into_value(expr, call.head), - None, - )) - } else { - let lazy = NuLazyFrame::try_from_value(value)?; - let expr = NuExpression::try_from_value(fill)?.into_polars(); - let lazy = NuLazyFrame::new(lazy.from_eager, lazy.into_polars().fill_null(expr)); - - Ok(PipelineData::Value(lazy.into_value(call.head)?, None)) - } - } -} - -#[cfg(test)] -mod test { - use super::super::super::series::Shift; - use super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(LazyFillNull {}), Box::new(Shift {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/lazy/filter.rs b/crates/nu-cmd-dataframe/src/dataframe/lazy/filter.rs deleted file mode 100644 index 5635a77e88..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/lazy/filter.rs +++ /dev/null @@ -1,83 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame}; -use nu_engine::command_prelude::*; - -#[derive(Clone)] -pub struct LazyFilter; - -impl Command for LazyFilter { - fn name(&self) -> &str { - "dfr filter" - } - - fn usage(&self) -> &str { - "Filter dataframe based in expression." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .required( - "filter expression", - SyntaxShape::Any, - "Expression that define the column selection", - ) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("lazyframe".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Filter dataframe using an expression", - example: "[[a b]; [6 2] [4 2] [2 2]] | dfr into-df | dfr filter ((dfr col a) >= 4)", - result: Some( - NuDataFrame::try_from_columns( - vec![ - Column::new( - "a".to_string(), - vec![Value::test_int(6), Value::test_int(4)], - ), - Column::new( - "b".to_string(), - vec![Value::test_int(2), Value::test_int(2)], - ), - ], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - let value: Value = call.req(engine_state, stack, 0)?; - let expression = NuExpression::try_from_value(value)?; - - let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?; - let lazy = NuLazyFrame::new( - lazy.from_eager, - lazy.into_polars().filter(expression.into_polars()), - ); - - Ok(PipelineData::Value(lazy.into_value(call.head)?, None)) - } -} - -#[cfg(test)] -mod test { - use super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(LazyFilter {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/lazy/flatten.rs b/crates/nu-cmd-dataframe/src/dataframe/lazy/flatten.rs deleted file mode 100644 index 602dcbcee3..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/lazy/flatten.rs +++ /dev/null @@ -1,126 +0,0 @@ -use super::explode::explode; -use crate::dataframe::values::{Column, NuDataFrame}; -use nu_engine::command_prelude::*; - -#[derive(Clone)] -pub struct LazyFlatten; - -impl Command for LazyFlatten { - fn name(&self) -> &str { - "dfr flatten" - } - - fn usage(&self) -> &str { - "An alias for dfr explode." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .rest( - "columns", - SyntaxShape::String, - "columns to flatten, only applicable for dataframes", - ) - .input_output_types(vec![ - ( - Type::Custom("expression".into()), - Type::Custom("expression".into()), - ), - ( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ), - ]) - .category(Category::Custom("lazyframe".into())) - } - - fn examples(&self) -> Vec { - vec![ -Example { - description: "Flatten the specified dataframe", - example: "[[id name hobbies]; [1 Mercy [Cycling Knitting]] [2 Bob [Skiing Football]]] | dfr into-df | dfr flatten hobbies | dfr collect", - result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new( - "id".to_string(), - vec![ - Value::test_int(1), - Value::test_int(1), - Value::test_int(2), - Value::test_int(2), - ]), - Column::new( - "name".to_string(), - vec![ - Value::test_string("Mercy"), - Value::test_string("Mercy"), - Value::test_string("Bob"), - Value::test_string("Bob"), - ]), - Column::new( - "hobbies".to_string(), - vec![ - Value::test_string("Cycling"), - Value::test_string("Knitting"), - Value::test_string("Skiing"), - Value::test_string("Football"), - ]), - ], None).expect("simple df for test should not fail") - .into_value(Span::test_data()), - ) - }, - Example { - description: "Select a column and flatten the values", - example: "[[id name hobbies]; [1 Mercy [Cycling Knitting]] [2 Bob [Skiing Football]]] | dfr into-df | dfr select (dfr col hobbies | dfr flatten)", - result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new( - "hobbies".to_string(), - vec![ - Value::test_string("Cycling"), - Value::test_string("Knitting"), - Value::test_string("Skiing"), - Value::test_string("Football"), - ]), - ], None).expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - ] - } - - fn run( - &self, - _engine_state: &EngineState, - _stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - explode(call, input) - } -} - -#[cfg(test)] -mod test { - use super::super::super::test_dataframe::{build_test_engine_state, test_dataframe_example}; - use super::*; - use crate::dataframe::lazy::aggregate::LazyAggregate; - use crate::dataframe::lazy::groupby::ToLazyGroupBy; - - #[test] - fn test_examples_dataframe() { - let mut engine_state = build_test_engine_state(vec![Box::new(LazyFlatten {})]); - test_dataframe_example(&mut engine_state, &LazyFlatten.examples()[0]); - } - - #[ignore] - #[test] - fn test_examples_expression() { - let mut engine_state = build_test_engine_state(vec![ - Box::new(LazyFlatten {}), - Box::new(LazyAggregate {}), - Box::new(ToLazyGroupBy {}), - ]); - test_dataframe_example(&mut engine_state, &LazyFlatten.examples()[1]); - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/lazy/groupby.rs b/crates/nu-cmd-dataframe/src/dataframe/lazy/groupby.rs deleted file mode 100644 index c31d563eb6..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/lazy/groupby.rs +++ /dev/null @@ -1,161 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame, NuLazyGroupBy}; -use nu_engine::command_prelude::*; - -use polars::prelude::Expr; - -#[derive(Clone)] -pub struct ToLazyGroupBy; - -impl Command for ToLazyGroupBy { - fn name(&self) -> &str { - "dfr group-by" - } - - fn usage(&self) -> &str { - "Creates a group-by object that can be used for other aggregations." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .rest( - "Group-by expressions", - SyntaxShape::Any, - "Expression(s) that define the lazy group-by", - ) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("lazyframe".into())) - } - - fn examples(&self) -> Vec { - vec![ - Example { - description: "Group by and perform an aggregation", - example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]] - | dfr into-df - | dfr group-by a - | dfr agg [ - (dfr col b | dfr min | dfr as "b_min") - (dfr col b | dfr max | dfr as "b_max") - (dfr col b | dfr sum | dfr as "b_sum") - ]"#, - result: Some( - NuDataFrame::try_from_columns( - vec![ - Column::new( - "a".to_string(), - vec![Value::test_int(1), Value::test_int(2)], - ), - Column::new( - "b_min".to_string(), - vec![Value::test_int(2), Value::test_int(4)], - ), - Column::new( - "b_max".to_string(), - vec![Value::test_int(4), Value::test_int(6)], - ), - Column::new( - "b_sum".to_string(), - vec![Value::test_int(6), Value::test_int(10)], - ), - ], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - Example { - description: "Group by and perform an aggregation", - example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]] - | dfr into-lazy - | dfr group-by a - | dfr agg [ - (dfr col b | dfr min | dfr as "b_min") - (dfr col b | dfr max | dfr as "b_max") - (dfr col b | dfr sum | dfr as "b_sum") - ] - | dfr collect"#, - result: Some( - NuDataFrame::try_from_columns( - vec![ - Column::new( - "a".to_string(), - vec![Value::test_int(1), Value::test_int(2)], - ), - Column::new( - "b_min".to_string(), - vec![Value::test_int(2), Value::test_int(4)], - ), - Column::new( - "b_max".to_string(), - vec![Value::test_int(4), Value::test_int(6)], - ), - Column::new( - "b_sum".to_string(), - vec![Value::test_int(6), Value::test_int(10)], - ), - ], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - ] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - let vals: Vec = call.rest(engine_state, stack, 0)?; - let value = Value::list(vals, call.head); - let expressions = NuExpression::extract_exprs(value)?; - - if expressions - .iter() - .any(|expr| !matches!(expr, Expr::Column(..))) - { - let value: Value = call.req(engine_state, stack, 0)?; - return Err(ShellError::IncompatibleParametersSingle { - msg: "Expected only Col expressions".into(), - span: value.span(), - }); - } - - let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?; - let group_by = NuLazyGroupBy { - schema: lazy.schema.clone(), - from_eager: lazy.from_eager, - group_by: Some(lazy.into_polars().group_by(&expressions)), - }; - - Ok(PipelineData::Value(group_by.into_value(call.head), None)) - } -} - -#[cfg(test)] -mod test { - use super::super::super::test_dataframe::test_dataframe; - use super::*; - use crate::dataframe::expressions::{ExprAlias, ExprMax, ExprMin, ExprSum}; - use crate::dataframe::lazy::aggregate::LazyAggregate; - - #[test] - fn test_examples() { - test_dataframe(vec![ - Box::new(LazyAggregate {}), - Box::new(ToLazyGroupBy {}), - Box::new(ExprAlias {}), - Box::new(ExprMin {}), - Box::new(ExprMax {}), - Box::new(ExprSum {}), - ]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/lazy/join.rs b/crates/nu-cmd-dataframe/src/dataframe/lazy/join.rs deleted file mode 100644 index 4ae297acfd..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/lazy/join.rs +++ /dev/null @@ -1,252 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame}; -use nu_engine::command_prelude::*; - -use polars::prelude::{Expr, JoinType}; - -#[derive(Clone)] -pub struct LazyJoin; - -impl Command for LazyJoin { - fn name(&self) -> &str { - "dfr join" - } - - fn usage(&self) -> &str { - "Joins a lazy frame with other lazy frame." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .required("other", SyntaxShape::Any, "LazyFrame to join with") - .required("left_on", SyntaxShape::Any, "Left column(s) to join on") - .required("right_on", SyntaxShape::Any, "Right column(s) to join on") - .switch( - "inner", - "inner join between lazyframes (default)", - Some('i'), - ) - .switch("left", "left join between lazyframes", Some('l')) - .switch("outer", "outer join between lazyframes", Some('o')) - .switch("cross", "cross join between lazyframes", Some('c')) - .named( - "suffix", - SyntaxShape::String, - "Suffix to use on columns with same name", - Some('s'), - ) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("lazyframe".into())) - } - - fn examples(&self) -> Vec { - vec![ - Example { - description: "Join two lazy dataframes", - example: r#"let df_a = ([[a b c];[1 "a" 0] [2 "b" 1] [1 "c" 2] [1 "c" 3]] | dfr into-lazy); - let df_b = ([["foo" "bar" "ham"];[1 "a" "let"] [2 "c" "var"] [3 "c" "const"]] | dfr into-lazy); - $df_a | dfr join $df_b a foo | dfr collect"#, - result: Some( - NuDataFrame::try_from_columns( - vec![ - Column::new( - "a".to_string(), - vec![ - Value::test_int(1), - Value::test_int(2), - Value::test_int(1), - Value::test_int(1), - ], - ), - Column::new( - "b".to_string(), - vec![ - Value::test_string("a"), - Value::test_string("b"), - Value::test_string("c"), - Value::test_string("c"), - ], - ), - Column::new( - "c".to_string(), - vec![ - Value::test_int(0), - Value::test_int(1), - Value::test_int(2), - Value::test_int(3), - ], - ), - Column::new( - "bar".to_string(), - vec![ - Value::test_string("a"), - Value::test_string("c"), - Value::test_string("a"), - Value::test_string("a"), - ], - ), - Column::new( - "ham".to_string(), - vec![ - Value::test_string("let"), - Value::test_string("var"), - Value::test_string("let"), - Value::test_string("let"), - ], - ), - ], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - Example { - description: "Join one eager dataframe with a lazy dataframe", - example: r#"let df_a = ([[a b c];[1 "a" 0] [2 "b" 1] [1 "c" 2] [1 "c" 3]] | dfr into-df); - let df_b = ([["foo" "bar" "ham"];[1 "a" "let"] [2 "c" "var"] [3 "c" "const"]] | dfr into-lazy); - $df_a | dfr join $df_b a foo"#, - result: Some( - NuDataFrame::try_from_columns( - vec![ - Column::new( - "a".to_string(), - vec![ - Value::test_int(1), - Value::test_int(2), - Value::test_int(1), - Value::test_int(1), - ], - ), - Column::new( - "b".to_string(), - vec![ - Value::test_string("a"), - Value::test_string("b"), - Value::test_string("c"), - Value::test_string("c"), - ], - ), - Column::new( - "c".to_string(), - vec![ - Value::test_int(0), - Value::test_int(1), - Value::test_int(2), - Value::test_int(3), - ], - ), - Column::new( - "bar".to_string(), - vec![ - Value::test_string("a"), - Value::test_string("c"), - Value::test_string("a"), - Value::test_string("a"), - ], - ), - Column::new( - "ham".to_string(), - vec![ - Value::test_string("let"), - Value::test_string("var"), - Value::test_string("let"), - Value::test_string("let"), - ], - ), - ], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - ] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - let left = call.has_flag(engine_state, stack, "left")?; - let outer = call.has_flag(engine_state, stack, "outer")?; - let cross = call.has_flag(engine_state, stack, "cross")?; - - let how = if left { - JoinType::Left - } else if outer { - JoinType::Outer { coalesce: true } - } else if cross { - JoinType::Cross - } else { - JoinType::Inner - }; - - let other: Value = call.req(engine_state, stack, 0)?; - let other = NuLazyFrame::try_from_value(other)?; - let other = other.into_polars(); - - let left_on: Value = call.req(engine_state, stack, 1)?; - let left_on = NuExpression::extract_exprs(left_on)?; - - let right_on: Value = call.req(engine_state, stack, 2)?; - let right_on = NuExpression::extract_exprs(right_on)?; - - if left_on.len() != right_on.len() { - let right_on: Value = call.req(engine_state, stack, 2)?; - return Err(ShellError::IncompatibleParametersSingle { - msg: "The right column list has a different size to the left column list".into(), - span: right_on.span(), - }); - } - - // Checking that both list of expressions are made out of col expressions or strings - for (index, list) in &[(1usize, &left_on), (2, &left_on)] { - if list.iter().any(|expr| !matches!(expr, Expr::Column(..))) { - let value: Value = call.req(engine_state, stack, *index)?; - return Err(ShellError::IncompatibleParametersSingle { - msg: "Expected only a string, col expressions or list of strings".into(), - span: value.span(), - }); - } - } - - let suffix: Option = call.get_flag(engine_state, stack, "suffix")?; - let suffix = suffix.unwrap_or_else(|| "_x".into()); - - let value = input.into_value(call.head)?; - let lazy = NuLazyFrame::try_from_value(value)?; - let from_eager = lazy.from_eager; - let lazy = lazy.into_polars(); - - let lazy = lazy - .join_builder() - .with(other) - .left_on(left_on) - .right_on(right_on) - .how(how) - .force_parallel(true) - .suffix(suffix) - .finish(); - - let lazy = NuLazyFrame::new(from_eager, lazy); - - Ok(PipelineData::Value(lazy.into_value(call.head)?, None)) - } -} - -#[cfg(test)] -mod test { - use super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(LazyJoin {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/lazy/macro_commands.rs b/crates/nu-cmd-dataframe/src/dataframe/lazy/macro_commands.rs deleted file mode 100644 index 89655b8e3f..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/lazy/macro_commands.rs +++ /dev/null @@ -1,246 +0,0 @@ -/// Definition of multiple lazyframe commands using a macro rule -/// All of these commands have an identical body and only require -/// to have a change in the name, description and function -use crate::dataframe::values::{Column, NuDataFrame, NuLazyFrame}; -use nu_engine::command_prelude::*; - -macro_rules! lazy_command { - ($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident, $test: ident) => { - #[derive(Clone)] - pub struct $command; - - impl Command for $command { - fn name(&self) -> &str { - $name - } - - fn usage(&self) -> &str { - $desc - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("lazyframe".into())) - } - - fn examples(&self) -> Vec { - $examples - } - - fn run( - &self, - _engine_state: &EngineState, - _stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?; - let lazy = NuLazyFrame::new(lazy.from_eager, lazy.into_polars().$func()); - - Ok(PipelineData::Value(lazy.into_value(call.head)?, None)) - } - } - - #[cfg(test)] - mod $test { - use super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new($command {})]) - } - } - }; - - ($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident, $test: ident, $ddot: expr) => { - #[derive(Clone)] - pub struct $command; - - impl Command for $command { - fn name(&self) -> &str { - $name - } - - fn usage(&self) -> &str { - $desc - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("lazyframe".into())) - } - - fn examples(&self) -> Vec { - $examples - } - - fn run( - &self, - _engine_state: &EngineState, - _stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?; - let lazy = NuLazyFrame::new(lazy.from_eager, lazy.into_polars().$func($ddot)); - - Ok(PipelineData::Value(lazy.into_value(call.head)?, None)) - } - } - - #[cfg(test)] - mod $test { - use super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new($command {})]) - } - } - }; - - ($command: ident, $name: expr, $desc: expr, $examples: expr, $func: ident?, $test: ident) => { - #[derive(Clone)] - pub struct $command; - - impl Command for $command { - fn name(&self) -> &str { - $name - } - - fn usage(&self) -> &str { - $desc - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("lazyframe".into())) - } - - fn examples(&self) -> Vec { - $examples - } - - fn run( - &self, - _engine_state: &EngineState, - _stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?; - - let lazy = NuLazyFrame::new( - lazy.from_eager, - lazy.into_polars() - .$func() - .map_err(|e| ShellError::GenericError { - error: "Dataframe Error".into(), - msg: e.to_string(), - help: None, - span: None, - inner: vec![], - })?, - ); - - Ok(PipelineData::Value(lazy.into_value(call.head)?, None)) - } - } - - #[cfg(test)] - mod $test { - use super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new($command {})]) - } - } - }; -} - -// LazyReverse command -// Expands to a command definition for reverse -lazy_command!( - LazyReverse, - "dfr reverse", - "Reverses the LazyFrame", - vec![Example { - description: "Reverses the dataframe.", - example: "[[a b]; [6 2] [4 2] [2 2]] | dfr into-df | dfr reverse", - result: Some( - NuDataFrame::try_from_columns( - vec![ - Column::new( - "a".to_string(), - vec![Value::test_int(2), Value::test_int(4), Value::test_int(6),], - ), - Column::new( - "b".to_string(), - vec![Value::test_int(2), Value::test_int(2), Value::test_int(2),], - ), - ], - None - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - },], - reverse, - test_reverse -); - -// LazyCache command -// Expands to a command definition for cache -lazy_command!( - LazyCache, - "dfr cache", - "Caches operations in a new LazyFrame.", - vec![Example { - description: "Caches the result into a new LazyFrame", - example: "[[a b]; [6 2] [4 2] [2 2]] | dfr into-df | dfr reverse | dfr cache", - result: None, - }], - cache, - test_cache -); - -// LazyMedian command -// Expands to a command definition for median aggregation -lazy_command!( - LazyMedian, - "dfr median", - "Aggregates columns to their median value", - vec![Example { - description: "Median value from columns in a dataframe", - example: "[[a b]; [6 2] [4 2] [2 2]] | dfr into-df | dfr median", - result: Some( - NuDataFrame::try_from_columns( - vec![ - Column::new("a".to_string(), vec![Value::test_float(4.0)],), - Column::new("b".to_string(), vec![Value::test_float(2.0)],), - ], - None - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - },], - median?, - test_median -); diff --git a/crates/nu-cmd-dataframe/src/dataframe/lazy/mod.rs b/crates/nu-cmd-dataframe/src/dataframe/lazy/mod.rs deleted file mode 100644 index cbbc4e8589..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/lazy/mod.rs +++ /dev/null @@ -1,65 +0,0 @@ -pub mod aggregate; -mod collect; -mod explode; -mod fetch; -mod fill_nan; -mod fill_null; -mod filter; -mod flatten; -pub mod groupby; -mod join; -mod macro_commands; -mod quantile; -mod select; -mod sort_by_expr; -mod to_lazy; - -use nu_protocol::engine::StateWorkingSet; - -use crate::dataframe::lazy::aggregate::LazyAggregate; -pub use crate::dataframe::lazy::collect::LazyCollect; -use crate::dataframe::lazy::fetch::LazyFetch; -use crate::dataframe::lazy::fill_nan::LazyFillNA; -pub use crate::dataframe::lazy::fill_null::LazyFillNull; -use crate::dataframe::lazy::filter::LazyFilter; -use crate::dataframe::lazy::groupby::ToLazyGroupBy; -use crate::dataframe::lazy::join::LazyJoin; -pub(crate) use crate::dataframe::lazy::macro_commands::*; -use crate::dataframe::lazy::quantile::LazyQuantile; -pub(crate) use crate::dataframe::lazy::select::LazySelect; -use crate::dataframe::lazy::sort_by_expr::LazySortBy; -pub use crate::dataframe::lazy::to_lazy::ToLazyFrame; -pub use explode::LazyExplode; -pub use flatten::LazyFlatten; - -pub fn add_lazy_decls(working_set: &mut StateWorkingSet) { - macro_rules! bind_command { - ( $command:expr ) => { - working_set.add_decl(Box::new($command)); - }; - ( $( $command:expr ),* ) => { - $( working_set.add_decl(Box::new($command)); )* - }; - } - - // Dataframe commands - bind_command!( - LazyAggregate, - LazyCache, - LazyCollect, - LazyFetch, - LazyFillNA, - LazyFillNull, - LazyFilter, - LazyJoin, - LazyQuantile, - LazyMedian, - LazyReverse, - LazySelect, - LazySortBy, - ToLazyFrame, - ToLazyGroupBy, - LazyExplode, - LazyFlatten - ); -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/lazy/quantile.rs b/crates/nu-cmd-dataframe/src/dataframe/lazy/quantile.rs deleted file mode 100644 index ac8ec590c6..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/lazy/quantile.rs +++ /dev/null @@ -1,87 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame, NuLazyFrame}; -use nu_engine::command_prelude::*; - -use polars::prelude::{lit, QuantileInterpolOptions}; - -#[derive(Clone)] -pub struct LazyQuantile; - -impl Command for LazyQuantile { - fn name(&self) -> &str { - "dfr quantile" - } - - fn usage(&self) -> &str { - "Aggregates the columns to the selected quantile." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .required( - "quantile", - SyntaxShape::Number, - "quantile value for quantile operation", - ) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("lazyframe".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "quantile value from columns in a dataframe", - example: "[[a b]; [6 2] [1 4] [4 1]] | dfr into-df | dfr quantile 0.5", - result: Some( - NuDataFrame::try_from_columns( - vec![ - Column::new("a".to_string(), vec![Value::test_float(4.0)]), - Column::new("b".to_string(), vec![Value::test_float(2.0)]), - ], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - let value = input.into_value(call.head)?; - let quantile: f64 = call.req(engine_state, stack, 0)?; - - let lazy = NuLazyFrame::try_from_value(value)?; - let lazy = NuLazyFrame::new( - lazy.from_eager, - lazy.into_polars() - .quantile(lit(quantile), QuantileInterpolOptions::default()) - .map_err(|e| ShellError::GenericError { - error: "Dataframe Error".into(), - msg: e.to_string(), - help: None, - span: None, - inner: vec![], - })?, - ); - - Ok(PipelineData::Value(lazy.into_value(call.head)?, None)) - } -} - -#[cfg(test)] -mod test { - use super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(LazyQuantile {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/lazy/select.rs b/crates/nu-cmd-dataframe/src/dataframe/lazy/select.rs deleted file mode 100644 index b4f01bdc07..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/lazy/select.rs +++ /dev/null @@ -1,75 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame}; -use nu_engine::command_prelude::*; - -#[derive(Clone)] -pub struct LazySelect; - -impl Command for LazySelect { - fn name(&self) -> &str { - "dfr select" - } - - fn usage(&self) -> &str { - "Selects columns from lazyframe." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .rest( - "select expressions", - SyntaxShape::Any, - "Expression(s) that define the column selection", - ) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("lazyframe".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Select a column from the dataframe", - example: "[[a b]; [6 2] [4 2] [2 2]] | dfr into-df | dfr select a", - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new( - "a".to_string(), - vec![Value::test_int(6), Value::test_int(4), Value::test_int(2)], - )], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - let vals: Vec = call.rest(engine_state, stack, 0)?; - let value = Value::list(vals, call.head); - let expressions = NuExpression::extract_exprs(value)?; - - let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?; - let lazy = NuLazyFrame::new(lazy.from_eager, lazy.into_polars().select(&expressions)); - - Ok(PipelineData::Value(lazy.into_value(call.head)?, None)) - } -} - -#[cfg(test)] -mod test { - use super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(LazySelect {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/lazy/sort_by_expr.rs b/crates/nu-cmd-dataframe/src/dataframe/lazy/sort_by_expr.rs deleted file mode 100644 index 2e109338a9..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/lazy/sort_by_expr.rs +++ /dev/null @@ -1,159 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame}; -use nu_engine::command_prelude::*; -use polars::chunked_array::ops::SortMultipleOptions; - -#[derive(Clone)] -pub struct LazySortBy; - -impl Command for LazySortBy { - fn name(&self) -> &str { - "dfr sort-by" - } - - fn usage(&self) -> &str { - "Sorts a lazy dataframe based on expression(s)." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .rest( - "sort expression", - SyntaxShape::Any, - "sort expression for the dataframe", - ) - .named( - "reverse", - SyntaxShape::List(Box::new(SyntaxShape::Boolean)), - "Reverse sorting. Default is false", - Some('r'), - ) - .switch( - "nulls-last", - "nulls are shown last in the dataframe", - Some('n'), - ) - .switch("maintain-order", "Maintains order during sort", Some('m')) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("lazyframe".into())) - } - - fn examples(&self) -> Vec { - vec![ - Example { - description: "Sort dataframe by one column", - example: "[[a b]; [6 2] [1 4] [4 1]] | dfr into-df | dfr sort-by a", - result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new( - "a".to_string(), - vec![Value::test_int(1), Value::test_int(4), Value::test_int(6)], - ), - Column::new( - "b".to_string(), - vec![Value::test_int(4), Value::test_int(1), Value::test_int(2)], - ), - ], None) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - Example { - description: "Sort column using two columns", - example: - "[[a b]; [6 2] [1 1] [1 4] [2 4]] | dfr into-df | dfr sort-by [a b] -r [false true]", - result: Some( - NuDataFrame::try_from_columns(vec![ - Column::new( - "a".to_string(), - vec![ - Value::test_int(1), - Value::test_int(1), - Value::test_int(2), - Value::test_int(6), - ], - ), - Column::new( - "b".to_string(), - vec![ - Value::test_int(4), - Value::test_int(1), - Value::test_int(4), - Value::test_int(2), - ], - ), - ], None) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - ] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - let vals: Vec = call.rest(engine_state, stack, 0)?; - let value = Value::list(vals, call.head); - let expressions = NuExpression::extract_exprs(value)?; - let nulls_last = call.has_flag(engine_state, stack, "nulls-last")?; - let maintain_order = call.has_flag(engine_state, stack, "maintain-order")?; - - let reverse: Option> = call.get_flag(engine_state, stack, "reverse")?; - let reverse = match reverse { - Some(list) => { - if expressions.len() != list.len() { - let span = call - .get_flag::(engine_state, stack, "reverse")? - .expect("already checked and it exists") - .span(); - return Err(ShellError::GenericError { - error: "Incorrect list size".into(), - msg: "Size doesn't match expression list".into(), - span: Some(span), - help: None, - inner: vec![], - }); - } else { - list - } - } - None => expressions.iter().map(|_| false).collect::>(), - }; - - let sort_options = SortMultipleOptions { - descending: reverse, - nulls_last, - multithreaded: true, - maintain_order, - }; - - let lazy = NuLazyFrame::try_from_pipeline(input, call.head)?; - let lazy = NuLazyFrame::new( - lazy.from_eager, - lazy.into_polars().sort_by_exprs(&expressions, sort_options), - ); - - Ok(PipelineData::Value( - NuLazyFrame::into_value(lazy, call.head)?, - None, - )) - } -} - -#[cfg(test)] -mod test { - use super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(LazySortBy {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/lazy/to_lazy.rs b/crates/nu-cmd-dataframe/src/dataframe/lazy/to_lazy.rs deleted file mode 100644 index 1c711cdd57..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/lazy/to_lazy.rs +++ /dev/null @@ -1,53 +0,0 @@ -use crate::dataframe::values::{NuDataFrame, NuLazyFrame, NuSchema}; -use nu_engine::command_prelude::*; - -#[derive(Clone)] -pub struct ToLazyFrame; - -impl Command for ToLazyFrame { - fn name(&self) -> &str { - "dfr into-lazy" - } - - fn usage(&self) -> &str { - "Converts a dataframe into a lazy dataframe." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .named( - "schema", - SyntaxShape::Record(vec![]), - r#"Polars Schema in format [{name: str}]. CSV, JSON, and JSONL files"#, - Some('s'), - ) - .input_output_type(Type::Any, Type::Custom("dataframe".into())) - .category(Category::Custom("lazyframe".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Takes a dictionary and creates a lazy dataframe", - example: "[[a b];[1 2] [3 4]] | dfr into-lazy", - result: None, - }] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - let maybe_schema = call - .get_flag(engine_state, stack, "schema")? - .map(|schema| NuSchema::try_from(&schema)) - .transpose()?; - - let df = NuDataFrame::try_from_iter(input.into_iter(), maybe_schema)?; - let lazy = NuLazyFrame::from_dataframe(df); - let value = Value::custom(Box::new(lazy), call.head); - Ok(PipelineData::Value(value, None)) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/mod.rs b/crates/nu-cmd-dataframe/src/dataframe/mod.rs deleted file mode 100644 index d99ce516be..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/mod.rs +++ /dev/null @@ -1,36 +0,0 @@ -mod eager; -mod expressions; -mod lazy; -mod series; -mod stub; -mod utils; -mod values; - -pub use eager::add_eager_decls; -pub use expressions::add_expressions; -pub use lazy::add_lazy_decls; -pub use series::add_series_decls; - -use nu_protocol::engine::{EngineState, StateWorkingSet}; - -pub fn add_dataframe_context(mut engine_state: EngineState) -> EngineState { - let delta = { - let mut working_set = StateWorkingSet::new(&engine_state); - working_set.add_decl(Box::new(stub::Dfr)); - add_series_decls(&mut working_set); - add_eager_decls(&mut working_set); - add_expressions(&mut working_set); - add_lazy_decls(&mut working_set); - - working_set.render() - }; - - if let Err(err) = engine_state.merge_delta(delta) { - eprintln!("Error creating dataframe command context: {err:?}"); - } - - engine_state -} - -#[cfg(test)] -mod test_dataframe; diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/all_false.rs b/crates/nu-cmd-dataframe/src/dataframe/series/all_false.rs deleted file mode 100644 index 66921e793c..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/series/all_false.rs +++ /dev/null @@ -1,108 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame}; -use nu_engine::command_prelude::*; - -#[derive(Clone)] -pub struct AllFalse; - -impl Command for AllFalse { - fn name(&self) -> &str { - "dfr all-false" - } - - fn usage(&self) -> &str { - "Returns true if all values are false." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![ - Example { - description: "Returns true if all values are false", - example: "[false false false] | dfr into-df | dfr all-false", - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new( - "all_false".to_string(), - vec![Value::test_bool(true)], - )], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - Example { - description: "Checks the result from a comparison", - example: r#"let s = ([5 6 2 10] | dfr into-df); - let res = ($s > 9); - $res | dfr all-false"#, - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new( - "all_false".to_string(), - vec![Value::test_bool(false)], - )], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - ] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - _engine_state: &EngineState, - _stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - - let series = df.as_series(call.head)?; - let bool = series.bool().map_err(|_| ShellError::GenericError { - error: "Error converting to bool".into(), - msg: "all-false only works with series of type bool".into(), - span: Some(call.head), - help: None, - inner: vec![], - })?; - - let value = Value::bool(!bool.any(), call.head); - - NuDataFrame::try_from_columns( - vec![Column::new("all_false".to_string(), vec![value])], - None, - ) - .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) -} - -#[cfg(test)] -mod test { - use super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(AllFalse {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/all_true.rs b/crates/nu-cmd-dataframe/src/dataframe/series/all_true.rs deleted file mode 100644 index 16b4a9edd9..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/series/all_true.rs +++ /dev/null @@ -1,105 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame}; -use nu_engine::command_prelude::*; - -#[derive(Clone)] -pub struct AllTrue; - -impl Command for AllTrue { - fn name(&self) -> &str { - "dfr all-true" - } - - fn usage(&self) -> &str { - "Returns true if all values are true." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![ - Example { - description: "Returns true if all values are true", - example: "[true true true] | dfr into-df | dfr all-true", - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new( - "all_true".to_string(), - vec![Value::test_bool(true)], - )], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - Example { - description: "Checks the result from a comparison", - example: r#"let s = ([5 6 2 8] | dfr into-df); - let res = ($s > 9); - $res | dfr all-true"#, - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new( - "all_true".to_string(), - vec![Value::test_bool(false)], - )], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - ] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - _engine_state: &EngineState, - _stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - - let series = df.as_series(call.head)?; - let bool = series.bool().map_err(|_| ShellError::GenericError { - error: "Error converting to bool".into(), - msg: "all-false only works with series of type bool".into(), - span: Some(call.head), - help: None, - inner: vec![], - })?; - - let value = Value::bool(bool.all(), call.head); - - NuDataFrame::try_from_columns(vec![Column::new("all_true".to_string(), vec![value])], None) - .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) -} - -#[cfg(test)] -mod test { - use super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(AllTrue {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/arg_max.rs b/crates/nu-cmd-dataframe/src/dataframe/series/arg_max.rs deleted file mode 100644 index d7539401ab..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/series/arg_max.rs +++ /dev/null @@ -1,85 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame}; -use nu_engine::command_prelude::*; -use polars::prelude::{ArgAgg, IntoSeries, NewChunkedArray, UInt32Chunked}; - -#[derive(Clone)] -pub struct ArgMax; - -impl Command for ArgMax { - fn name(&self) -> &str { - "dfr arg-max" - } - - fn usage(&self) -> &str { - "Return index for max value in series." - } - - fn search_terms(&self) -> Vec<&str> { - vec!["argmax", "maximum", "most", "largest", "greatest"] - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Returns index for max value", - example: "[1 3 2] | dfr into-df | dfr arg-max", - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new("arg_max".to_string(), vec![Value::test_int(1)])], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - _engine_state: &EngineState, - _stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - let series = df.as_series(call.head)?; - - let res = series.arg_max(); - let chunked = match res { - Some(index) => UInt32Chunked::from_slice("arg_max", &[index as u32]), - None => UInt32Chunked::from_slice("arg_max", &[]), - }; - - let res = chunked.into_series(); - NuDataFrame::try_from_series(vec![res], call.head) - .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) -} - -#[cfg(test)] -mod test { - use super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(ArgMax {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/arg_min.rs b/crates/nu-cmd-dataframe/src/dataframe/series/arg_min.rs deleted file mode 100644 index 1b685d65b4..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/series/arg_min.rs +++ /dev/null @@ -1,85 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame}; -use nu_engine::command_prelude::*; -use polars::prelude::{ArgAgg, IntoSeries, NewChunkedArray, UInt32Chunked}; - -#[derive(Clone)] -pub struct ArgMin; - -impl Command for ArgMin { - fn name(&self) -> &str { - "dfr arg-min" - } - - fn usage(&self) -> &str { - "Return index for min value in series." - } - - fn search_terms(&self) -> Vec<&str> { - vec!["argmin", "minimum", "least", "smallest", "lowest"] - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Returns index for min value", - example: "[1 3 2] | dfr into-df | dfr arg-min", - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new("arg_min".to_string(), vec![Value::test_int(0)])], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - _engine_state: &EngineState, - _stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - let series = df.as_series(call.head)?; - - let res = series.arg_min(); - let chunked = match res { - Some(index) => UInt32Chunked::from_slice("arg_min", &[index as u32]), - None => UInt32Chunked::from_slice("arg_min", &[]), - }; - - let res = chunked.into_series(); - NuDataFrame::try_from_series(vec![res], call.head) - .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) -} - -#[cfg(test)] -mod test { - use super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(ArgMin {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/cumulative.rs b/crates/nu-cmd-dataframe/src/dataframe/series/cumulative.rs deleted file mode 100644 index c32875e87b..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/series/cumulative.rs +++ /dev/null @@ -1,148 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame}; -use nu_engine::command_prelude::*; - -use polars::prelude::{DataType, IntoSeries}; -use polars_ops::prelude::{cum_max, cum_min, cum_sum}; - -enum CumType { - Min, - Max, - Sum, -} - -impl CumType { - fn from_str(roll_type: &str, span: Span) -> Result { - match roll_type { - "min" => Ok(Self::Min), - "max" => Ok(Self::Max), - "sum" => Ok(Self::Sum), - _ => Err(ShellError::GenericError { - error: "Wrong operation".into(), - msg: "Operation not valid for cumulative".into(), - span: Some(span), - help: Some("Allowed values: max, min, sum".into()), - inner: vec![], - }), - } - } - - fn to_str(&self) -> &'static str { - match self { - CumType::Min => "cumulative_min", - CumType::Max => "cumulative_max", - CumType::Sum => "cumulative_sum", - } - } -} - -#[derive(Clone)] -pub struct Cumulative; - -impl Command for Cumulative { - fn name(&self) -> &str { - "dfr cumulative" - } - - fn usage(&self) -> &str { - "Cumulative calculation for a series." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .required("type", SyntaxShape::String, "rolling operation") - .switch("reverse", "Reverse cumulative calculation", Some('r')) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Cumulative sum for a series", - example: "[1 2 3 4 5] | dfr into-df | dfr cumulative sum", - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new( - "0_cumulative_sum".to_string(), - vec![ - Value::test_int(1), - Value::test_int(3), - Value::test_int(6), - Value::test_int(10), - Value::test_int(15), - ], - )], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let cum_type: Spanned = call.req(engine_state, stack, 0)?; - let reverse = call.has_flag(engine_state, stack, "reverse")?; - - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - let series = df.as_series(call.head)?; - - if let DataType::Object(..) = series.dtype() { - return Err(ShellError::GenericError { - error: "Found object series".into(), - msg: "Series of type object cannot be used for cumulative operation".into(), - span: Some(call.head), - help: None, - inner: vec![], - }); - } - - let cum_type = CumType::from_str(&cum_type.item, cum_type.span)?; - let mut res = match cum_type { - CumType::Max => cum_max(&series, reverse), - CumType::Min => cum_min(&series, reverse), - CumType::Sum => cum_sum(&series, reverse), - } - .map_err(|e| ShellError::GenericError { - error: "Error creating cumulative".into(), - msg: e.to_string(), - span: Some(call.head), - help: None, - inner: vec![], - })?; - - let name = format!("{}_{}", series.name(), cum_type.to_str()); - res.rename(&name); - - NuDataFrame::try_from_series(vec![res.into_series()], call.head) - .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) -} - -#[cfg(test)] -mod test { - use super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(Cumulative {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/date/as_date.rs b/crates/nu-cmd-dataframe/src/dataframe/series/date/as_date.rs deleted file mode 100644 index b406057572..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/series/date/as_date.rs +++ /dev/null @@ -1,94 +0,0 @@ -use crate::dataframe::values::NuDataFrame; -use nu_engine::command_prelude::*; - -use polars::prelude::{IntoSeries, StringMethods}; - -#[derive(Clone)] -pub struct AsDate; - -impl Command for AsDate { - fn name(&self) -> &str { - "dfr as-date" - } - - fn usage(&self) -> &str { - r#"Converts string to date."# - } - - fn extra_usage(&self) -> &str { - r#"Format example: - "%Y-%m-%d" => 2021-12-31 - "%d-%m-%Y" => 31-12-2021 - "%Y%m%d" => 2021319 (2021-03-19)"# - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .required("format", SyntaxShape::String, "formatting date string") - .switch("not-exact", "the format string may be contained in the date (e.g. foo-2021-01-01-bar could match 2021-01-01)", Some('n')) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Converts string to date", - example: r#"["2021-12-30" "2021-12-31"] | dfr into-df | dfr as-datetime "%Y-%m-%d""#, - result: None, - }] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let format: String = call.req(engine_state, stack, 0)?; - let not_exact = call.has_flag(engine_state, stack, "not-exact")?; - - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - let series = df.as_series(call.head)?; - let casted = series.str().map_err(|e| ShellError::GenericError { - error: "Error casting to string".into(), - msg: e.to_string(), - span: Some(call.head), - help: None, - inner: vec![], - })?; - - let res = if not_exact { - casted.as_date_not_exact(Some(format.as_str())) - } else { - casted.as_date(Some(format.as_str()), false) - }; - - let mut res = res - .map_err(|e| ShellError::GenericError { - error: "Error creating datetime".into(), - msg: e.to_string(), - span: Some(call.head), - help: None, - inner: vec![], - })? - .into_series(); - - res.rename("date"); - - NuDataFrame::try_from_series(vec![res], call.head) - .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/date/as_datetime.rs b/crates/nu-cmd-dataframe/src/dataframe/series/date/as_datetime.rs deleted file mode 100644 index 6ee979b069..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/series/date/as_datetime.rs +++ /dev/null @@ -1,187 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame}; -use chrono::DateTime; -use nu_engine::command_prelude::*; - -use polars::prelude::{IntoSeries, StringMethods, TimeUnit}; - -#[derive(Clone)] -pub struct AsDateTime; - -impl Command for AsDateTime { - fn name(&self) -> &str { - "dfr as-datetime" - } - - fn usage(&self) -> &str { - r#"Converts string to datetime."# - } - - fn extra_usage(&self) -> &str { - r#"Format example: - "%y/%m/%d %H:%M:%S" => 21/12/31 12:54:98 - "%y-%m-%d %H:%M:%S" => 2021-12-31 24:58:01 - "%y/%m/%d %H:%M:%S" => 21/12/31 24:58:01 - "%y%m%d %H:%M:%S" => 210319 23:58:50 - "%Y/%m/%d %H:%M:%S" => 2021/12/31 12:54:98 - "%Y-%m-%d %H:%M:%S" => 2021-12-31 24:58:01 - "%Y/%m/%d %H:%M:%S" => 2021/12/31 24:58:01 - "%Y%m%d %H:%M:%S" => 20210319 23:58:50 - "%FT%H:%M:%S" => 2019-04-18T02:45:55 - "%FT%H:%M:%S.%6f" => microseconds - "%FT%H:%M:%S.%9f" => nanoseconds"# - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .required("format", SyntaxShape::String, "formatting date time string") - .switch("not-exact", "the format string may be contained in the date (e.g. foo-2021-01-01-bar could match 2021-01-01)", Some('n')) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![ - Example { - description: "Converts string to datetime", - example: r#"["2021-12-30 00:00:00" "2021-12-31 00:00:00"] | dfr into-df | dfr as-datetime "%Y-%m-%d %H:%M:%S""#, - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new( - "datetime".to_string(), - vec![ - Value::date( - DateTime::parse_from_str( - "2021-12-30 00:00:00 +0000", - "%Y-%m-%d %H:%M:%S %z", - ) - .expect("date calculation should not fail in test"), - Span::test_data(), - ), - Value::date( - DateTime::parse_from_str( - "2021-12-31 00:00:00 +0000", - "%Y-%m-%d %H:%M:%S %z", - ) - .expect("date calculation should not fail in test"), - Span::test_data(), - ), - ], - )], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - Example { - description: "Converts string to datetime with high resolutions", - example: r#"["2021-12-30 00:00:00.123456789" "2021-12-31 00:00:00.123456789"] | dfr into-df | dfr as-datetime "%Y-%m-%d %H:%M:%S.%9f""#, - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new( - "datetime".to_string(), - vec![ - Value::date( - DateTime::parse_from_str( - "2021-12-30 00:00:00.123456789 +0000", - "%Y-%m-%d %H:%M:%S.%9f %z", - ) - .expect("date calculation should not fail in test"), - Span::test_data(), - ), - Value::date( - DateTime::parse_from_str( - "2021-12-31 00:00:00.123456789 +0000", - "%Y-%m-%d %H:%M:%S.%9f %z", - ) - .expect("date calculation should not fail in test"), - Span::test_data(), - ), - ], - )], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - ] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let format: String = call.req(engine_state, stack, 0)?; - let not_exact = call.has_flag(engine_state, stack, "not-exact")?; - - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - let series = df.as_series(call.head)?; - let casted = series.str().map_err(|e| ShellError::GenericError { - error: "Error casting to string".into(), - msg: e.to_string(), - span: Some(call.head), - help: None, - inner: vec![], - })?; - - let res = if not_exact { - casted.as_datetime_not_exact( - Some(format.as_str()), - TimeUnit::Nanoseconds, - false, - None, - &Default::default(), - ) - } else { - casted.as_datetime( - Some(format.as_str()), - TimeUnit::Nanoseconds, - false, - false, - None, - &Default::default(), - ) - }; - - let mut res = res - .map_err(|e| ShellError::GenericError { - error: "Error creating datetime".into(), - msg: e.to_string(), - span: Some(call.head), - help: None, - inner: vec![], - })? - .into_series(); - - res.rename("datetime"); - NuDataFrame::try_from_series(vec![res], call.head) - .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) -} - -#[cfg(test)] -mod test { - use super::super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(AsDateTime {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/date/get_day.rs b/crates/nu-cmd-dataframe/src/dataframe/series/date/get_day.rs deleted file mode 100644 index 9187219d7a..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/series/date/get_day.rs +++ /dev/null @@ -1,90 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame}; -use nu_engine::command_prelude::*; -use polars::prelude::{DatetimeMethods, IntoSeries}; - -#[derive(Clone)] -pub struct GetDay; - -impl Command for GetDay { - fn name(&self) -> &str { - "dfr get-day" - } - - fn usage(&self) -> &str { - "Gets day from date." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Returns day from a date", - example: r#"let dt = ('2020-08-04T16:39:18+00:00' | into datetime --timezone 'UTC'); - let df = ([$dt $dt] | dfr into-df); - $df | dfr get-day"#, - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new( - "0".to_string(), - vec![Value::test_int(4), Value::test_int(4)], - )], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - _engine_state: &EngineState, - _stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - let series = df.as_series(call.head)?; - - let casted = series.datetime().map_err(|e| ShellError::GenericError { - error: "Error casting to datetime type".into(), - msg: e.to_string(), - span: Some(call.head), - help: None, - inner: vec![], - })?; - - let res = casted.day().into_series(); - - NuDataFrame::try_from_series(vec![res], call.head) - .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) -} - -#[cfg(explore_refactor_IntoDatetime)] -mod test { - use super::super::super::super::super::IntoDatetime; - use super::super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(GetDay {}), Box::new(IntoDatetime {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/date/get_hour.rs b/crates/nu-cmd-dataframe/src/dataframe/series/date/get_hour.rs deleted file mode 100644 index ba05843047..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/series/date/get_hour.rs +++ /dev/null @@ -1,90 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame}; -use nu_engine::command_prelude::*; -use polars::prelude::{DatetimeMethods, IntoSeries}; - -#[derive(Clone)] -pub struct GetHour; - -impl Command for GetHour { - fn name(&self) -> &str { - "dfr get-hour" - } - - fn usage(&self) -> &str { - "Gets hour from date." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Returns hour from a date", - example: r#"let dt = ('2020-08-04T16:39:18+00:00' | into datetime --timezone 'UTC'); - let df = ([$dt $dt] | dfr into-df); - $df | dfr get-hour"#, - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new( - "0".to_string(), - vec![Value::test_int(16), Value::test_int(16)], - )], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - _engine_state: &EngineState, - _stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - let series = df.as_series(call.head)?; - - let casted = series.datetime().map_err(|e| ShellError::GenericError { - error: "Error casting to datetime type".into(), - msg: e.to_string(), - span: Some(call.head), - help: None, - inner: vec![], - })?; - - let res = casted.hour().into_series(); - - NuDataFrame::try_from_series(vec![res], call.head) - .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) -} - -#[cfg(explore_refactor_IntoDatetime)] -mod test { - use super::super::super::super::super::IntoDatetime; - use super::super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(GetHour {}), Box::new(IntoDatetime {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/date/get_minute.rs b/crates/nu-cmd-dataframe/src/dataframe/series/date/get_minute.rs deleted file mode 100644 index 902ed61d56..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/series/date/get_minute.rs +++ /dev/null @@ -1,90 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame}; -use nu_engine::command_prelude::*; -use polars::prelude::{DatetimeMethods, IntoSeries}; - -#[derive(Clone)] -pub struct GetMinute; - -impl Command for GetMinute { - fn name(&self) -> &str { - "dfr get-minute" - } - - fn usage(&self) -> &str { - "Gets minute from date." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Returns minute from a date", - example: r#"let dt = ('2020-08-04T16:39:18+00:00' | into datetime --timezone 'UTC'); - let df = ([$dt $dt] | dfr into-df); - $df | dfr get-minute"#, - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new( - "0".to_string(), - vec![Value::test_int(39), Value::test_int(39)], - )], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - _engine_state: &EngineState, - _stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - let series = df.as_series(call.head)?; - - let casted = series.datetime().map_err(|e| ShellError::GenericError { - error: "Error casting to datetime type".into(), - msg: e.to_string(), - span: Some(call.head), - help: None, - inner: vec![], - })?; - - let res = casted.minute().into_series(); - - NuDataFrame::try_from_series(vec![res], call.head) - .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) -} - -#[cfg(explore_refactor_IntoDatetime)] -mod test { - use super::super::super::super::super::IntoDatetime; - use super::super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(GetMinute {}), Box::new(IntoDatetime {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/date/get_month.rs b/crates/nu-cmd-dataframe/src/dataframe/series/date/get_month.rs deleted file mode 100644 index 077d5afc1e..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/series/date/get_month.rs +++ /dev/null @@ -1,90 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame}; -use nu_engine::command_prelude::*; -use polars::prelude::{DatetimeMethods, IntoSeries}; - -#[derive(Clone)] -pub struct GetMonth; - -impl Command for GetMonth { - fn name(&self) -> &str { - "dfr get-month" - } - - fn usage(&self) -> &str { - "Gets month from date." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Returns month from a date", - example: r#"let dt = ('2020-08-04T16:39:18+00:00' | into datetime --timezone 'UTC'); - let df = ([$dt $dt] | dfr into-df); - $df | dfr get-month"#, - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new( - "0".to_string(), - vec![Value::test_int(8), Value::test_int(8)], - )], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - _engine_state: &EngineState, - _stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - let series = df.as_series(call.head)?; - - let casted = series.datetime().map_err(|e| ShellError::GenericError { - error: "Error casting to datetime type".into(), - msg: e.to_string(), - span: Some(call.head), - help: None, - inner: vec![], - })?; - - let res = casted.month().into_series(); - - NuDataFrame::try_from_series(vec![res], call.head) - .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) -} - -#[cfg(explore_refactor_IntoDatetime)] -mod test { - use super::super::super::super::super::IntoDatetime; - use super::super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(GetMonth {}), Box::new(IntoDatetime {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/date/get_nanosecond.rs b/crates/nu-cmd-dataframe/src/dataframe/series/date/get_nanosecond.rs deleted file mode 100644 index 1543e31082..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/series/date/get_nanosecond.rs +++ /dev/null @@ -1,90 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame}; -use nu_engine::command_prelude::*; -use polars::prelude::{DatetimeMethods, IntoSeries}; - -#[derive(Clone)] -pub struct GetNanosecond; - -impl Command for GetNanosecond { - fn name(&self) -> &str { - "dfr get-nanosecond" - } - - fn usage(&self) -> &str { - "Gets nanosecond from date." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Returns nanosecond from a date", - example: r#"let dt = ('2020-08-04T16:39:18+00:00' | into datetime --timezone 'UTC'); - let df = ([$dt $dt] | dfr into-df); - $df | dfr get-nanosecond"#, - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new( - "0".to_string(), - vec![Value::test_int(0), Value::test_int(0)], - )], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - _engine_state: &EngineState, - _stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - let series = df.as_series(call.head)?; - - let casted = series.datetime().map_err(|e| ShellError::GenericError { - error: "Error casting to datetime type".into(), - msg: e.to_string(), - span: Some(call.head), - help: None, - inner: vec![], - })?; - - let res = casted.nanosecond().into_series(); - - NuDataFrame::try_from_series(vec![res], call.head) - .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) -} - -#[cfg(explore_refactor_IntoDatetime)] -mod test { - use super::super::super::super::super::IntoDatetime; - use super::super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(GetNanosecond {}), Box::new(IntoDatetime {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/date/get_ordinal.rs b/crates/nu-cmd-dataframe/src/dataframe/series/date/get_ordinal.rs deleted file mode 100644 index b77ebbc14c..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/series/date/get_ordinal.rs +++ /dev/null @@ -1,90 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame}; -use nu_engine::command_prelude::*; -use polars::prelude::{DatetimeMethods, IntoSeries}; - -#[derive(Clone)] -pub struct GetOrdinal; - -impl Command for GetOrdinal { - fn name(&self) -> &str { - "dfr get-ordinal" - } - - fn usage(&self) -> &str { - "Gets ordinal from date." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Returns ordinal from a date", - example: r#"let dt = ('2020-08-04T16:39:18+00:00' | into datetime --timezone 'UTC'); - let df = ([$dt $dt] | dfr into-df); - $df | dfr get-ordinal"#, - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new( - "0".to_string(), - vec![Value::test_int(217), Value::test_int(217)], - )], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - _engine_state: &EngineState, - _stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - let series = df.as_series(call.head)?; - - let casted = series.datetime().map_err(|e| ShellError::GenericError { - error: "Error casting to datetime type".into(), - msg: e.to_string(), - span: Some(call.head), - help: None, - inner: vec![], - })?; - - let res = casted.ordinal().into_series(); - - NuDataFrame::try_from_series(vec![res], call.head) - .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) -} - -#[cfg(explore_refactor_IntoDatetime)] -mod test { - use super::super::super::super::super::IntoDatetime; - use super::super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(GetOrdinal {}), Box::new(IntoDatetime {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/date/get_second.rs b/crates/nu-cmd-dataframe/src/dataframe/series/date/get_second.rs deleted file mode 100644 index e039bcc010..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/series/date/get_second.rs +++ /dev/null @@ -1,90 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame}; -use nu_engine::command_prelude::*; -use polars::prelude::{DatetimeMethods, IntoSeries}; - -#[derive(Clone)] -pub struct GetSecond; - -impl Command for GetSecond { - fn name(&self) -> &str { - "dfr get-second" - } - - fn usage(&self) -> &str { - "Gets second from date." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Returns second from a date", - example: r#"let dt = ('2020-08-04T16:39:18+00:00' | into datetime --timezone 'UTC'); - let df = ([$dt $dt] | dfr into-df); - $df | dfr get-second"#, - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new( - "0".to_string(), - vec![Value::test_int(18), Value::test_int(18)], - )], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - _engine_state: &EngineState, - _stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - let series = df.as_series(call.head)?; - - let casted = series.datetime().map_err(|e| ShellError::GenericError { - error: "Error casting to datetime type".into(), - msg: e.to_string(), - span: Some(call.head), - help: None, - inner: vec![], - })?; - - let res = casted.second().into_series(); - - NuDataFrame::try_from_series(vec![res], call.head) - .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) -} - -#[cfg(explore_refactor_IntoDatetime)] -mod test { - use super::super::super::super::super::IntoDatetime; - use super::super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(GetSecond {}), Box::new(IntoDatetime {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/date/get_week.rs b/crates/nu-cmd-dataframe/src/dataframe/series/date/get_week.rs deleted file mode 100644 index 1a1bc2c12d..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/series/date/get_week.rs +++ /dev/null @@ -1,90 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame}; -use nu_engine::command_prelude::*; -use polars::prelude::{DatetimeMethods, IntoSeries}; - -#[derive(Clone)] -pub struct GetWeek; - -impl Command for GetWeek { - fn name(&self) -> &str { - "dfr get-week" - } - - fn usage(&self) -> &str { - "Gets week from date." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Returns week from a date", - example: r#"let dt = ('2020-08-04T16:39:18+00:00' | into datetime --timezone 'UTC'); - let df = ([$dt $dt] | dfr into-df); - $df | dfr get-week"#, - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new( - "0".to_string(), - vec![Value::test_int(32), Value::test_int(32)], - )], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - _engine_state: &EngineState, - _stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - let series = df.as_series(call.head)?; - - let casted = series.datetime().map_err(|e| ShellError::GenericError { - error: "Error casting to datetime type".into(), - msg: e.to_string(), - span: Some(call.head), - help: None, - inner: vec![], - })?; - - let res = casted.week().into_series(); - - NuDataFrame::try_from_series(vec![res], call.head) - .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) -} - -#[cfg(explore_refactor_IntoDatetime)] -mod test { - use super::super::super::super::super::IntoDatetime; - use super::super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(GetWeek {}), Box::new(IntoDatetime {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/date/get_weekday.rs b/crates/nu-cmd-dataframe/src/dataframe/series/date/get_weekday.rs deleted file mode 100644 index b5cf1b3197..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/series/date/get_weekday.rs +++ /dev/null @@ -1,90 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame}; -use nu_engine::command_prelude::*; -use polars::prelude::{DatetimeMethods, IntoSeries}; - -#[derive(Clone)] -pub struct GetWeekDay; - -impl Command for GetWeekDay { - fn name(&self) -> &str { - "dfr get-weekday" - } - - fn usage(&self) -> &str { - "Gets weekday from date." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Returns weekday from a date", - example: r#"let dt = ('2020-08-04T16:39:18+00:00' | into datetime --timezone 'UTC'); - let df = ([$dt $dt] | dfr into-df); - $df | dfr get-weekday"#, - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new( - "0".to_string(), - vec![Value::test_int(2), Value::test_int(2)], - )], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - _engine_state: &EngineState, - _stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - let series = df.as_series(call.head)?; - - let casted = series.datetime().map_err(|e| ShellError::GenericError { - error: "Error casting to datetime type".into(), - msg: e.to_string(), - span: Some(call.head), - help: None, - inner: vec![], - })?; - - let res = casted.weekday().into_series(); - - NuDataFrame::try_from_series(vec![res], call.head) - .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) -} - -#[cfg(explore_refactor_IntoDatetime)] -mod test { - use super::super::super::super::super::IntoDatetime; - use super::super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(GetWeekDay {}), Box::new(IntoDatetime {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/date/get_year.rs b/crates/nu-cmd-dataframe/src/dataframe/series/date/get_year.rs deleted file mode 100644 index 1ec3515949..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/series/date/get_year.rs +++ /dev/null @@ -1,90 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame}; -use nu_engine::command_prelude::*; -use polars::prelude::{DatetimeMethods, IntoSeries}; - -#[derive(Clone)] -pub struct GetYear; - -impl Command for GetYear { - fn name(&self) -> &str { - "dfr get-year" - } - - fn usage(&self) -> &str { - "Gets year from date." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Returns year from a date", - example: r#"let dt = ('2020-08-04T16:39:18+00:00' | into datetime --timezone 'UTC'); - let df = ([$dt $dt] | dfr into-df); - $df | dfr get-year"#, - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new( - "0".to_string(), - vec![Value::test_int(2020), Value::test_int(2020)], - )], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - _engine_state: &EngineState, - _stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - let series = df.as_series(call.head)?; - - let casted = series.datetime().map_err(|e| ShellError::GenericError { - error: "Error casting to datetime type".into(), - msg: e.to_string(), - span: Some(call.head), - help: None, - inner: vec![], - })?; - - let res = casted.year().into_series(); - - NuDataFrame::try_from_series(vec![res], call.head) - .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) -} - -#[cfg(explore_refactor_IntoDatetime)] -mod test { - use super::super::super::super::super::IntoDatetime; - use super::super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(GetYear {}), Box::new(IntoDatetime {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/date/mod.rs b/crates/nu-cmd-dataframe/src/dataframe/series/date/mod.rs deleted file mode 100644 index ed3895a172..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/series/date/mod.rs +++ /dev/null @@ -1,25 +0,0 @@ -mod as_date; -mod as_datetime; -mod get_day; -mod get_hour; -mod get_minute; -mod get_month; -mod get_nanosecond; -mod get_ordinal; -mod get_second; -mod get_week; -mod get_weekday; -mod get_year; - -pub use as_date::AsDate; -pub use as_datetime::AsDateTime; -pub use get_day::GetDay; -pub use get_hour::GetHour; -pub use get_minute::GetMinute; -pub use get_month::GetMonth; -pub use get_nanosecond::GetNanosecond; -pub use get_ordinal::GetOrdinal; -pub use get_second::GetSecond; -pub use get_week::GetWeek; -pub use get_weekday::GetWeekDay; -pub use get_year::GetYear; diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/indexes/arg_sort.rs b/crates/nu-cmd-dataframe/src/dataframe/series/indexes/arg_sort.rs deleted file mode 100644 index bf28cbac58..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/series/indexes/arg_sort.rs +++ /dev/null @@ -1,130 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame}; -use nu_engine::command_prelude::*; - -use polars::prelude::{IntoSeries, SortOptions}; - -#[derive(Clone)] -pub struct ArgSort; - -impl Command for ArgSort { - fn name(&self) -> &str { - "dfr arg-sort" - } - - fn usage(&self) -> &str { - "Returns indexes for a sorted series." - } - - fn search_terms(&self) -> Vec<&str> { - vec!["argsort", "order", "arrange"] - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .switch("reverse", "reverse order", Some('r')) - .switch("nulls-last", "nulls ordered last", Some('n')) - .switch( - "maintain-order", - "maintain order on sorted items", - Some('m'), - ) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![ - Example { - description: "Returns indexes for a sorted series", - example: "[1 2 2 3 3] | dfr into-df | dfr arg-sort", - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new( - "arg_sort".to_string(), - vec![ - Value::test_int(0), - Value::test_int(1), - Value::test_int(2), - Value::test_int(3), - Value::test_int(4), - ], - )], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - Example { - description: "Returns indexes for a sorted series", - example: "[1 2 2 3 3] | dfr into-df | dfr arg-sort --reverse", - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new( - "arg_sort".to_string(), - vec![ - Value::test_int(3), - Value::test_int(4), - Value::test_int(1), - Value::test_int(2), - Value::test_int(0), - ], - )], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - ] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - - let sort_options = SortOptions { - descending: call.has_flag(engine_state, stack, "reverse")?, - nulls_last: call.has_flag(engine_state, stack, "nulls-last")?, - multithreaded: true, - maintain_order: call.has_flag(engine_state, stack, "maintain-order")?, - }; - - let mut res = df - .as_series(call.head)? - .arg_sort(sort_options) - .into_series(); - res.rename("arg_sort"); - - NuDataFrame::try_from_series(vec![res], call.head) - .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) -} - -#[cfg(test)] -mod test { - use super::super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(ArgSort {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/indexes/arg_true.rs b/crates/nu-cmd-dataframe/src/dataframe/series/indexes/arg_true.rs deleted file mode 100644 index 106e95f5ea..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/series/indexes/arg_true.rs +++ /dev/null @@ -1,115 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame}; -use nu_engine::command_prelude::*; -use polars::prelude::{arg_where, col, IntoLazy}; - -#[derive(Clone)] -pub struct ArgTrue; - -impl Command for ArgTrue { - fn name(&self) -> &str { - "dfr arg-true" - } - - fn usage(&self) -> &str { - "Returns indexes where values are true." - } - - fn search_terms(&self) -> Vec<&str> { - vec!["argtrue", "truth", "boolean-true"] - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Returns indexes where values are true", - example: "[false true false] | dfr into-df | dfr arg-true", - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new( - "arg_true".to_string(), - vec![Value::test_int(1)], - )], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - _engine_state: &EngineState, - _stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - let columns = df.as_ref().get_column_names(); - if columns.len() > 1 { - return Err(ShellError::GenericError { - error: "Error using as series".into(), - msg: "dataframe has more than one column".into(), - span: Some(call.head), - help: None, - inner: vec![], - }); - } - - match columns.first() { - Some(column) => { - let expression = arg_where(col(column).eq(true)).alias("arg_true"); - let res = df - .as_ref() - .clone() - .lazy() - .select(&[expression]) - .collect() - .map_err(|err| ShellError::GenericError { - error: "Error creating index column".into(), - msg: err.to_string(), - span: Some(call.head), - help: None, - inner: vec![], - })?; - - let value = NuDataFrame::dataframe_into_value(res, call.head); - Ok(PipelineData::Value(value, None)) - } - _ => Err(ShellError::UnsupportedInput { - msg: "Expected the dataframe to have a column".to_string(), - input: "".to_string(), - msg_span: call.head, - input_span: call.head, - }), - } -} - -#[cfg(test)] -mod test { - use super::super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(ArgTrue {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/indexes/arg_unique.rs b/crates/nu-cmd-dataframe/src/dataframe/series/indexes/arg_unique.rs deleted file mode 100644 index 6b69518cba..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/series/indexes/arg_unique.rs +++ /dev/null @@ -1,93 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame}; -use nu_engine::command_prelude::*; -use polars::prelude::IntoSeries; - -#[derive(Clone)] -pub struct ArgUnique; - -impl Command for ArgUnique { - fn name(&self) -> &str { - "dfr arg-unique" - } - - fn usage(&self) -> &str { - "Returns indexes for unique values." - } - - fn search_terms(&self) -> Vec<&str> { - vec!["argunique", "distinct", "noduplicate", "unrepeated"] - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Returns indexes for unique values", - example: "[1 2 2 3 3] | dfr into-df | dfr arg-unique", - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new( - "arg_unique".to_string(), - vec![Value::test_int(0), Value::test_int(1), Value::test_int(3)], - )], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - _engine_state: &EngineState, - _stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - - let mut res = df - .as_series(call.head)? - .arg_unique() - .map_err(|e| ShellError::GenericError { - error: "Error extracting unique values".into(), - msg: e.to_string(), - span: Some(call.head), - help: None, - inner: vec![], - })? - .into_series(); - res.rename("arg_unique"); - - NuDataFrame::try_from_series(vec![res], call.head) - .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) -} - -#[cfg(test)] -mod test { - use super::super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(ArgUnique {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/indexes/mod.rs b/crates/nu-cmd-dataframe/src/dataframe/series/indexes/mod.rs deleted file mode 100644 index c0af8c8653..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/series/indexes/mod.rs +++ /dev/null @@ -1,9 +0,0 @@ -mod arg_sort; -mod arg_true; -mod arg_unique; -mod set_with_idx; - -pub use arg_sort::ArgSort; -pub use arg_true::ArgTrue; -pub use arg_unique::ArgUnique; -pub use set_with_idx::SetWithIndex; diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/indexes/set_with_idx.rs b/crates/nu-cmd-dataframe/src/dataframe/series/indexes/set_with_idx.rs deleted file mode 100644 index 307ef4d5c3..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/series/indexes/set_with_idx.rs +++ /dev/null @@ -1,213 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame}; -use nu_engine::command_prelude::*; - -use polars::prelude::{ChunkSet, DataType, IntoSeries}; - -#[derive(Clone)] -pub struct SetWithIndex; - -impl Command for SetWithIndex { - fn name(&self) -> &str { - "dfr set-with-idx" - } - - fn usage(&self) -> &str { - "Sets value in the given index." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .required("value", SyntaxShape::Any, "value to be inserted in series") - .required_named( - "indices", - SyntaxShape::Any, - "list of indices indicating where to set the value", - Some('i'), - ) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Set value in selected rows from series", - example: r#"let series = ([4 1 5 2 4 3] | dfr into-df); - let indices = ([0 2] | dfr into-df); - $series | dfr set-with-idx 6 --indices $indices"#, - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new( - "0".to_string(), - vec![ - Value::test_int(6), - Value::test_int(1), - Value::test_int(6), - Value::test_int(2), - Value::test_int(4), - Value::test_int(3), - ], - )], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let value: Value = call.req(engine_state, stack, 0)?; - - let indices_value: Value = call - .get_flag(engine_state, stack, "indices")? - .expect("required named value"); - let indices_span = indices_value.span(); - let indices = NuDataFrame::try_from_value(indices_value)?.as_series(indices_span)?; - - let casted = match indices.dtype() { - DataType::UInt32 | DataType::UInt64 | DataType::Int32 | DataType::Int64 => indices - .as_ref() - .cast(&DataType::UInt32) - .map_err(|e| ShellError::GenericError { - error: "Error casting indices".into(), - msg: e.to_string(), - span: Some(indices_span), - help: None, - inner: vec![], - }), - _ => Err(ShellError::GenericError { - error: "Incorrect type".into(), - msg: "Series with incorrect type".into(), - span: Some(indices_span), - help: Some("Consider using a Series with type int type".into()), - inner: vec![], - }), - }?; - - let indices = casted - .u32() - .map_err(|e| ShellError::GenericError { - error: "Error casting indices".into(), - msg: e.to_string(), - span: Some(indices_span), - help: None, - inner: vec![], - })? - .into_iter() - .flatten(); - - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - let series = df.as_series(call.head)?; - - let span = value.span(); - let res = match value { - Value::Int { val, .. } => { - let chunked = series.i64().map_err(|e| ShellError::GenericError { - error: "Error casting to i64".into(), - msg: e.to_string(), - span: Some(span), - help: None, - inner: vec![], - })?; - - let res = chunked.scatter_single(indices, Some(val)).map_err(|e| { - ShellError::GenericError { - error: "Error setting value".into(), - msg: e.to_string(), - span: Some(span), - help: None, - inner: vec![], - } - })?; - - NuDataFrame::try_from_series(vec![res.into_series()], call.head) - } - Value::Float { val, .. } => { - let chunked = series.f64().map_err(|e| ShellError::GenericError { - error: "Error casting to f64".into(), - msg: e.to_string(), - span: Some(span), - help: None, - inner: vec![], - })?; - - let res = chunked.scatter_single(indices, Some(val)).map_err(|e| { - ShellError::GenericError { - error: "Error setting value".into(), - msg: e.to_string(), - span: Some(span), - help: None, - inner: vec![], - } - })?; - - NuDataFrame::try_from_series(vec![res.into_series()], call.head) - } - Value::String { val, .. } => { - let chunked = series.str().map_err(|e| ShellError::GenericError { - error: "Error casting to string".into(), - msg: e.to_string(), - span: Some(span), - help: None, - inner: vec![], - })?; - - let res = chunked - .scatter_single(indices, Some(val.as_ref())) - .map_err(|e| ShellError::GenericError { - error: "Error setting value".into(), - msg: e.to_string(), - span: Some(span), - help: None, - inner: vec![], - })?; - - let mut res = res.into_series(); - res.rename("string"); - - NuDataFrame::try_from_series(vec![res.into_series()], call.head) - } - _ => Err(ShellError::GenericError { - error: "Incorrect value type".into(), - msg: format!( - "this value cannot be set in a series of type '{}'", - series.dtype() - ), - span: Some(span), - help: None, - inner: vec![], - }), - }; - - res.map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) -} - -#[cfg(test)] -mod test { - use super::super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(SetWithIndex {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/masks/is_duplicated.rs b/crates/nu-cmd-dataframe/src/dataframe/series/masks/is_duplicated.rs deleted file mode 100644 index b28f977b47..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/series/masks/is_duplicated.rs +++ /dev/null @@ -1,122 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame}; -use nu_engine::command_prelude::*; -use polars::prelude::IntoSeries; - -#[derive(Clone)] -pub struct IsDuplicated; - -impl Command for IsDuplicated { - fn name(&self) -> &str { - "dfr is-duplicated" - } - - fn usage(&self) -> &str { - "Creates mask indicating duplicated values." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![ - Example { - description: "Create mask indicating duplicated values", - example: "[5 6 6 6 8 8 8] | dfr into-df | dfr is-duplicated", - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new( - "is_duplicated".to_string(), - vec![ - Value::test_bool(false), - Value::test_bool(true), - Value::test_bool(true), - Value::test_bool(true), - Value::test_bool(true), - Value::test_bool(true), - Value::test_bool(true), - ], - )], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - Example { - description: "Create mask indicating duplicated rows in a dataframe", - example: - "[[a, b]; [1 2] [1 2] [3 3] [3 3] [1 1]] | dfr into-df | dfr is-duplicated", - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new( - "is_duplicated".to_string(), - vec![ - Value::test_bool(true), - Value::test_bool(true), - Value::test_bool(true), - Value::test_bool(true), - Value::test_bool(false), - ], - )], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - ] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - _engine_state: &EngineState, - _stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - - let mut res = df - .as_ref() - .is_duplicated() - .map_err(|e| ShellError::GenericError { - error: "Error finding duplicates".into(), - msg: e.to_string(), - span: Some(call.head), - help: None, - inner: vec![], - })? - .into_series(); - - res.rename("is_duplicated"); - - NuDataFrame::try_from_series(vec![res], call.head) - .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) -} - -#[cfg(test)] -mod test { - use super::super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(IsDuplicated {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/masks/is_in.rs b/crates/nu-cmd-dataframe/src/dataframe/series/masks/is_in.rs deleted file mode 100644 index 0792d3fddf..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/series/masks/is_in.rs +++ /dev/null @@ -1,104 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame}; -use nu_engine::command_prelude::*; - -use polars::prelude::{is_in, IntoSeries}; - -#[derive(Clone)] -pub struct IsIn; - -impl Command for IsIn { - fn name(&self) -> &str { - "dfr is-in" - } - - fn usage(&self) -> &str { - "Checks if elements from a series are contained in right series." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .required("other", SyntaxShape::Any, "right series") - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Checks if elements from a series are contained in right series", - example: r#"let other = ([1 3 6] | dfr into-df); - [5 6 6 6 8 8 8] | dfr into-df | dfr is-in $other"#, - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new( - "is_in".to_string(), - vec![ - Value::test_bool(false), - Value::test_bool(true), - Value::test_bool(true), - Value::test_bool(true), - Value::test_bool(false), - Value::test_bool(false), - Value::test_bool(false), - ], - )], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let df = NuDataFrame::try_from_pipeline(input, call.head)?.as_series(call.head)?; - - let other_value: Value = call.req(engine_state, stack, 0)?; - let other_span = other_value.span(); - let other_df = NuDataFrame::try_from_value(other_value)?; - let other = other_df.as_series(other_span)?; - - let mut res = is_in(&df, &other) - .map_err(|e| ShellError::GenericError { - error: "Error finding in other".into(), - msg: e.to_string(), - span: Some(call.head), - help: None, - inner: vec![], - })? - .into_series(); - - res.rename("is_in"); - - NuDataFrame::try_from_series(vec![res.into_series()], call.head) - .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) -} - -#[cfg(test)] -mod test { - use super::super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(IsIn {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/masks/is_not_null.rs b/crates/nu-cmd-dataframe/src/dataframe/series/masks/is_not_null.rs deleted file mode 100644 index 4ed33ce951..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/series/masks/is_not_null.rs +++ /dev/null @@ -1,122 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame, NuExpression}; -use nu_engine::command_prelude::*; -use polars::prelude::IntoSeries; - -#[derive(Clone)] -pub struct IsNotNull; - -impl Command for IsNotNull { - fn name(&self) -> &str { - "dfr is-not-null" - } - - fn usage(&self) -> &str { - "Creates mask where value is not null." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .input_output_types(vec![ - ( - Type::Custom("expression".into()), - Type::Custom("expression".into()), - ), - ( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ), - ]) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![ - Example { - description: "Create mask where values are not null", - example: r#"let s = ([5 6 0 8] | dfr into-df); - let res = ($s / $s); - $res | dfr is-not-null"#, - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new( - "is_not_null".to_string(), - vec![ - Value::test_bool(true), - Value::test_bool(true), - Value::test_bool(false), - Value::test_bool(true), - ], - )], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - Example { - description: "Creates a is not null expression from a column", - example: "dfr col a | dfr is-not-null", - result: None, - }, - ] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - let value = input.into_value(call.head)?; - if NuDataFrame::can_downcast(&value) { - let df = NuDataFrame::try_from_value(value)?; - command(engine_state, stack, call, df) - } else { - let expr = NuExpression::try_from_value(value)?; - let expr: NuExpression = expr.into_polars().is_not_null().into(); - - Ok(PipelineData::Value( - NuExpression::into_value(expr, call.head), - None, - )) - } - } -} - -fn command( - _engine_state: &EngineState, - _stack: &mut Stack, - call: &Call, - df: NuDataFrame, -) -> Result { - let mut res = df.as_series(call.head)?.is_not_null(); - res.rename("is_not_null"); - - NuDataFrame::try_from_series(vec![res.into_series()], call.head) - .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) -} - -#[cfg(test)] -mod test { - use super::*; - use crate::dataframe::lazy::aggregate::LazyAggregate; - use crate::dataframe::lazy::groupby::ToLazyGroupBy; - use crate::dataframe::test_dataframe::{build_test_engine_state, test_dataframe_example}; - - #[test] - fn test_examples_dataframe() { - let mut engine_state = build_test_engine_state(vec![Box::new(IsNotNull {})]); - test_dataframe_example(&mut engine_state, &IsNotNull.examples()[0]); - } - - #[test] - fn test_examples_expression() { - let mut engine_state = build_test_engine_state(vec![ - Box::new(IsNotNull {}), - Box::new(LazyAggregate {}), - Box::new(ToLazyGroupBy {}), - ]); - test_dataframe_example(&mut engine_state, &IsNotNull.examples()[1]); - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/masks/is_null.rs b/crates/nu-cmd-dataframe/src/dataframe/series/masks/is_null.rs deleted file mode 100644 index b99d48af66..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/series/masks/is_null.rs +++ /dev/null @@ -1,122 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame, NuExpression}; -use nu_engine::command_prelude::*; -use polars::prelude::IntoSeries; - -#[derive(Clone)] -pub struct IsNull; - -impl Command for IsNull { - fn name(&self) -> &str { - "dfr is-null" - } - - fn usage(&self) -> &str { - "Creates mask where value is null." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .input_output_types(vec![ - ( - Type::Custom("expression".into()), - Type::Custom("expression".into()), - ), - ( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ), - ]) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![ - Example { - description: "Create mask where values are null", - example: r#"let s = ([5 6 0 8] | dfr into-df); - let res = ($s / $s); - $res | dfr is-null"#, - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new( - "is_null".to_string(), - vec![ - Value::test_bool(false), - Value::test_bool(false), - Value::test_bool(true), - Value::test_bool(false), - ], - )], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - Example { - description: "Creates a is null expression from a column", - example: "dfr col a | dfr is-null", - result: None, - }, - ] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - let value = input.into_value(call.head)?; - if NuDataFrame::can_downcast(&value) { - let df = NuDataFrame::try_from_value(value)?; - command(engine_state, stack, call, df) - } else { - let expr = NuExpression::try_from_value(value)?; - let expr: NuExpression = expr.into_polars().is_null().into(); - - Ok(PipelineData::Value( - NuExpression::into_value(expr, call.head), - None, - )) - } - } -} - -fn command( - _engine_state: &EngineState, - _stack: &mut Stack, - call: &Call, - df: NuDataFrame, -) -> Result { - let mut res = df.as_series(call.head)?.is_null(); - res.rename("is_null"); - - NuDataFrame::try_from_series(vec![res.into_series()], call.head) - .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) -} - -#[cfg(test)] -mod test { - use super::*; - use crate::dataframe::lazy::aggregate::LazyAggregate; - use crate::dataframe::lazy::groupby::ToLazyGroupBy; - use crate::dataframe::test_dataframe::{build_test_engine_state, test_dataframe_example}; - - #[test] - fn test_examples_dataframe() { - let mut engine_state = build_test_engine_state(vec![Box::new(IsNull {})]); - test_dataframe_example(&mut engine_state, &IsNull.examples()[0]); - } - - #[test] - fn test_examples_expression() { - let mut engine_state = build_test_engine_state(vec![ - Box::new(IsNull {}), - Box::new(LazyAggregate {}), - Box::new(ToLazyGroupBy {}), - ]); - test_dataframe_example(&mut engine_state, &IsNull.examples()[1]); - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/masks/is_unique.rs b/crates/nu-cmd-dataframe/src/dataframe/series/masks/is_unique.rs deleted file mode 100644 index 8e313abca7..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/series/masks/is_unique.rs +++ /dev/null @@ -1,121 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame}; -use nu_engine::command_prelude::*; -use polars::prelude::IntoSeries; - -#[derive(Clone)] -pub struct IsUnique; - -impl Command for IsUnique { - fn name(&self) -> &str { - "dfr is-unique" - } - - fn usage(&self) -> &str { - "Creates mask indicating unique values." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![ - Example { - description: "Create mask indicating unique values", - example: "[5 6 6 6 8 8 8] | dfr into-df | dfr is-unique", - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new( - "is_unique".to_string(), - vec![ - Value::test_bool(true), - Value::test_bool(false), - Value::test_bool(false), - Value::test_bool(false), - Value::test_bool(false), - Value::test_bool(false), - Value::test_bool(false), - ], - )], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - Example { - description: "Create mask indicating duplicated rows in a dataframe", - example: "[[a, b]; [1 2] [1 2] [3 3] [3 3] [1 1]] | dfr into-df | dfr is-unique", - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new( - "is_unique".to_string(), - vec![ - Value::test_bool(false), - Value::test_bool(false), - Value::test_bool(false), - Value::test_bool(false), - Value::test_bool(true), - ], - )], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - ] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - _engine_state: &EngineState, - _stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - - let mut res = df - .as_ref() - .is_unique() - .map_err(|e| ShellError::GenericError { - error: "Error finding unique values".into(), - msg: e.to_string(), - span: Some(call.head), - help: None, - inner: vec![], - })? - .into_series(); - - res.rename("is_unique"); - - NuDataFrame::try_from_series(vec![res], call.head) - .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) -} - -#[cfg(test)] -mod test { - use super::super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(IsUnique {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/masks/mod.rs b/crates/nu-cmd-dataframe/src/dataframe/series/masks/mod.rs deleted file mode 100644 index 80c98b5ef0..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/series/masks/mod.rs +++ /dev/null @@ -1,15 +0,0 @@ -mod is_duplicated; -mod is_in; -mod is_not_null; -mod is_null; -mod is_unique; -mod not; -mod set; - -pub use is_duplicated::IsDuplicated; -pub use is_in::IsIn; -pub use is_not_null::IsNotNull; -pub use is_null::IsNull; -pub use is_unique::IsUnique; -pub use not::NotSeries; -pub use set::SetSeries; diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/masks/not.rs b/crates/nu-cmd-dataframe/src/dataframe/series/masks/not.rs deleted file mode 100644 index 081a3c3b23..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/series/masks/not.rs +++ /dev/null @@ -1,93 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame}; -use nu_engine::command_prelude::*; -use polars::prelude::IntoSeries; - -use std::ops::Not; - -#[derive(Clone)] -pub struct NotSeries; - -impl Command for NotSeries { - fn name(&self) -> &str { - "dfr not" - } - - fn usage(&self) -> &str { - "Inverts boolean mask." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Inverts boolean mask", - example: "[true false true] | dfr into-df | dfr not", - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new( - "0".to_string(), - vec![ - Value::test_bool(false), - Value::test_bool(true), - Value::test_bool(false), - ], - )], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - command(engine_state, stack, call, df) - } -} - -fn command( - _engine_state: &EngineState, - _stack: &mut Stack, - call: &Call, - df: NuDataFrame, -) -> Result { - let series = df.as_series(call.head)?; - - let bool = series.bool().map_err(|e| ShellError::GenericError { - error: "Error inverting mask".into(), - msg: e.to_string(), - span: Some(call.head), - help: None, - inner: vec![], - })?; - - let res = bool.not(); - - NuDataFrame::try_from_series(vec![res.into_series()], call.head) - .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) -} - -#[cfg(test)] -mod test { - use super::super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(NotSeries {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/masks/set.rs b/crates/nu-cmd-dataframe/src/dataframe/series/masks/set.rs deleted file mode 100644 index 4dacb7117b..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/series/masks/set.rs +++ /dev/null @@ -1,201 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame}; -use nu_engine::command_prelude::*; - -use polars::prelude::{ChunkSet, DataType, IntoSeries}; - -#[derive(Clone)] -pub struct SetSeries; - -impl Command for SetSeries { - fn name(&self) -> &str { - "dfr set" - } - - fn usage(&self) -> &str { - "Sets value where given mask is true." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .required("value", SyntaxShape::Any, "value to be inserted in series") - .required_named( - "mask", - SyntaxShape::Any, - "mask indicating insertions", - Some('m'), - ) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Shifts the values by a given period", - example: r#"let s = ([1 2 2 3 3] | dfr into-df | dfr shift 2); - let mask = ($s | dfr is-null); - $s | dfr set 0 --mask $mask"#, - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new( - "0".to_string(), - vec![ - Value::test_int(0), - Value::test_int(0), - Value::test_int(1), - Value::test_int(2), - Value::test_int(2), - ], - )], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let value: Value = call.req(engine_state, stack, 0)?; - - let mask_value: Value = call - .get_flag(engine_state, stack, "mask")? - .expect("required named value"); - let mask_span = mask_value.span(); - let mask = NuDataFrame::try_from_value(mask_value)?.as_series(mask_span)?; - - let bool_mask = match mask.dtype() { - DataType::Boolean => mask.bool().map_err(|e| ShellError::GenericError { - error: "Error casting to bool".into(), - msg: e.to_string(), - span: Some(mask_span), - help: None, - inner: vec![], - }), - _ => Err(ShellError::GenericError { - error: "Incorrect type".into(), - msg: "can only use bool series as mask".into(), - span: Some(mask_span), - help: None, - inner: vec![], - }), - }?; - - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - let series = df.as_series(call.head)?; - let span = value.span(); - let res = match value { - Value::Int { val, .. } => { - let chunked = series.i64().map_err(|e| ShellError::GenericError { - error: "Error casting to i64".into(), - msg: e.to_string(), - span: Some(span), - help: None, - inner: vec![], - })?; - - let res = chunked - .set(bool_mask, Some(val)) - .map_err(|e| ShellError::GenericError { - error: "Error setting value".into(), - msg: e.to_string(), - span: Some(span), - help: None, - inner: vec![], - })?; - - NuDataFrame::try_from_series(vec![res.into_series()], call.head) - } - Value::Float { val, .. } => { - let chunked = series.f64().map_err(|e| ShellError::GenericError { - error: "Error casting to f64".into(), - msg: e.to_string(), - span: Some(span), - help: None, - inner: vec![], - })?; - - let res = chunked - .set(bool_mask, Some(val)) - .map_err(|e| ShellError::GenericError { - error: "Error setting value".into(), - msg: e.to_string(), - span: Some(span), - help: None, - inner: vec![], - })?; - - NuDataFrame::try_from_series(vec![res.into_series()], call.head) - } - Value::String { val, .. } => { - let chunked = series.str().map_err(|e| ShellError::GenericError { - error: "Error casting to string".into(), - msg: e.to_string(), - span: Some(span), - help: None, - inner: vec![], - })?; - - let res = chunked.set(bool_mask, Some(val.as_ref())).map_err(|e| { - ShellError::GenericError { - error: "Error setting value".into(), - msg: e.to_string(), - span: Some(span), - help: None, - inner: vec![], - } - })?; - - let mut res = res.into_series(); - res.rename("string"); - - NuDataFrame::try_from_series(vec![res.into_series()], call.head) - } - _ => Err(ShellError::GenericError { - error: "Incorrect value type".into(), - msg: format!( - "this value cannot be set in a series of type '{}'", - series.dtype() - ), - span: Some(span), - help: None, - inner: vec![], - }), - }; - - res.map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) -} - -#[cfg(test)] -mod test { - use super::super::super::super::test_dataframe::test_dataframe; - use super::super::super::{IsNull, Shift}; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![ - Box::new(SetSeries {}), - Box::new(IsNull {}), - Box::new(Shift {}), - ]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/mod.rs b/crates/nu-cmd-dataframe/src/dataframe/series/mod.rs deleted file mode 100644 index e1b9bc1087..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/series/mod.rs +++ /dev/null @@ -1,95 +0,0 @@ -mod date; -pub use date::*; - -mod string; -pub use string::*; - -mod masks; -pub use masks::*; - -mod indexes; -pub use indexes::*; - -mod all_false; -mod all_true; -mod arg_max; -mod arg_min; -mod cumulative; -mod n_null; -mod n_unique; -mod rolling; -mod shift; -mod unique; -mod value_counts; - -use nu_protocol::engine::StateWorkingSet; - -pub use all_false::AllFalse; -pub use all_true::AllTrue; -pub use arg_max::ArgMax; -pub use arg_min::ArgMin; -pub use cumulative::Cumulative; -pub use n_null::NNull; -pub use n_unique::NUnique; -pub use rolling::Rolling; -pub use shift::Shift; -pub use unique::Unique; -pub use value_counts::ValueCount; - -pub fn add_series_decls(working_set: &mut StateWorkingSet) { - macro_rules! bind_command { - ( $command:expr ) => { - working_set.add_decl(Box::new($command)); - }; - ( $( $command:expr ),* ) => { - $( working_set.add_decl(Box::new($command)); )* - }; - } - - // Series commands - bind_command!( - AllFalse, - AllTrue, - ArgMax, - ArgMin, - ArgSort, - ArgTrue, - ArgUnique, - AsDate, - AsDateTime, - Concatenate, - Contains, - Cumulative, - GetDay, - GetHour, - GetMinute, - GetMonth, - GetNanosecond, - GetOrdinal, - GetSecond, - GetWeek, - GetWeekDay, - GetYear, - IsDuplicated, - IsIn, - IsNotNull, - IsNull, - IsUnique, - NNull, - NUnique, - NotSeries, - Replace, - ReplaceAll, - Rolling, - SetSeries, - SetWithIndex, - Shift, - StrLengths, - StrSlice, - StrFTime, - ToLowerCase, - ToUpperCase, - Unique, - ValueCount - ); -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/n_null.rs b/crates/nu-cmd-dataframe/src/dataframe/series/n_null.rs deleted file mode 100644 index 6c9909da07..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/series/n_null.rs +++ /dev/null @@ -1,82 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame}; -use nu_engine::command_prelude::*; - -#[derive(Clone)] -pub struct NNull; - -impl Command for NNull { - fn name(&self) -> &str { - "dfr count-null" - } - - fn usage(&self) -> &str { - "Counts null values." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Counts null values", - example: r#"let s = ([1 1 0 0 3 3 4] | dfr into-df); - ($s / $s) | dfr count-null"#, - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new( - "count_null".to_string(), - vec![Value::test_int(2)], - )], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - _engine_state: &EngineState, - _stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - - let res = df.as_series(call.head)?.null_count(); - let value = Value::int(res as i64, call.head); - - NuDataFrame::try_from_columns( - vec![Column::new("count_null".to_string(), vec![value])], - None, - ) - .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) -} - -#[cfg(test)] -mod test { - use super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(NNull {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/n_unique.rs b/crates/nu-cmd-dataframe/src/dataframe/series/n_unique.rs deleted file mode 100644 index c6d6e829f8..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/series/n_unique.rs +++ /dev/null @@ -1,127 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame, NuExpression}; -use nu_engine::command_prelude::*; - -#[derive(Clone)] -pub struct NUnique; - -impl Command for NUnique { - fn name(&self) -> &str { - "dfr n-unique" - } - - fn usage(&self) -> &str { - "Counts unique values." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .input_output_types(vec![ - ( - Type::Custom("expression".into()), - Type::Custom("expression".into()), - ), - ( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ), - ]) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![ - Example { - description: "Counts unique values", - example: "[1 1 2 2 3 3 4] | dfr into-df | dfr n-unique", - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new( - "count_unique".to_string(), - vec![Value::test_int(4)], - )], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - Example { - description: "Creates a is n-unique expression from a column", - example: "dfr col a | dfr n-unique", - result: None, - }, - ] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - let value = input.into_value(call.head)?; - if NuDataFrame::can_downcast(&value) { - let df = NuDataFrame::try_from_value(value)?; - command(engine_state, stack, call, df) - } else { - let expr = NuExpression::try_from_value(value)?; - let expr: NuExpression = expr.into_polars().n_unique().into(); - - Ok(PipelineData::Value( - NuExpression::into_value(expr, call.head), - None, - )) - } - } -} - -fn command( - _engine_state: &EngineState, - _stack: &mut Stack, - call: &Call, - df: NuDataFrame, -) -> Result { - let res = df - .as_series(call.head)? - .n_unique() - .map_err(|e| ShellError::GenericError { - error: "Error counting unique values".into(), - msg: e.to_string(), - span: Some(call.head), - help: None, - inner: vec![], - })?; - - let value = Value::int(res as i64, call.head); - - NuDataFrame::try_from_columns( - vec![Column::new("count_unique".to_string(), vec![value])], - None, - ) - .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) -} - -#[cfg(test)] -mod test { - use super::super::super::test_dataframe::{build_test_engine_state, test_dataframe_example}; - use super::*; - use crate::dataframe::lazy::aggregate::LazyAggregate; - use crate::dataframe::lazy::groupby::ToLazyGroupBy; - - #[test] - fn test_examples_dataframe() { - let mut engine_state = build_test_engine_state(vec![Box::new(NUnique {})]); - test_dataframe_example(&mut engine_state, &NUnique.examples()[0]); - } - - #[test] - fn test_examples_expression() { - let mut engine_state = build_test_engine_state(vec![ - Box::new(NUnique {}), - Box::new(LazyAggregate {}), - Box::new(ToLazyGroupBy {}), - ]); - test_dataframe_example(&mut engine_state, &NUnique.examples()[1]); - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/rolling.rs b/crates/nu-cmd-dataframe/src/dataframe/series/rolling.rs deleted file mode 100644 index b659462298..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/series/rolling.rs +++ /dev/null @@ -1,186 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame}; -use nu_engine::command_prelude::*; - -use polars::prelude::{DataType, Duration, IntoSeries, RollingOptionsImpl, SeriesOpsTime}; - -enum RollType { - Min, - Max, - Sum, - Mean, -} - -impl RollType { - fn from_str(roll_type: &str, span: Span) -> Result { - match roll_type { - "min" => Ok(Self::Min), - "max" => Ok(Self::Max), - "sum" => Ok(Self::Sum), - "mean" => Ok(Self::Mean), - _ => Err(ShellError::GenericError { - error: "Wrong operation".into(), - msg: "Operation not valid for cumulative".into(), - span: Some(span), - help: Some("Allowed values: min, max, sum, mean".into()), - inner: vec![], - }), - } - } - - fn to_str(&self) -> &'static str { - match self { - RollType::Min => "rolling_min", - RollType::Max => "rolling_max", - RollType::Sum => "rolling_sum", - RollType::Mean => "rolling_mean", - } - } -} - -#[derive(Clone)] -pub struct Rolling; - -impl Command for Rolling { - fn name(&self) -> &str { - "dfr rolling" - } - - fn usage(&self) -> &str { - "Rolling calculation for a series." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .required("type", SyntaxShape::String, "rolling operation") - .required("window", SyntaxShape::Int, "Window size for rolling") - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![ - Example { - description: "Rolling sum for a series", - example: "[1 2 3 4 5] | dfr into-df | dfr rolling sum 2 | dfr drop-nulls", - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new( - "0_rolling_sum".to_string(), - vec![ - Value::test_int(3), - Value::test_int(5), - Value::test_int(7), - Value::test_int(9), - ], - )], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - Example { - description: "Rolling max for a series", - example: "[1 2 3 4 5] | dfr into-df | dfr rolling max 2 | dfr drop-nulls", - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new( - "0_rolling_max".to_string(), - vec![ - Value::test_int(2), - Value::test_int(3), - Value::test_int(4), - Value::test_int(5), - ], - )], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - ] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let roll_type: Spanned = call.req(engine_state, stack, 0)?; - let window_size: i64 = call.req(engine_state, stack, 1)?; - - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - let series = df.as_series(call.head)?; - - if let DataType::Object(..) = series.dtype() { - return Err(ShellError::GenericError { - error: "Found object series".into(), - msg: "Series of type object cannot be used for rolling operation".into(), - span: Some(call.head), - help: None, - inner: vec![], - }); - } - - let roll_type = RollType::from_str(&roll_type.item, roll_type.span)?; - - let rolling_opts = RollingOptionsImpl { - window_size: Duration::new(window_size), - min_periods: window_size as usize, - weights: None, - center: false, - by: None, - closed_window: None, - tu: None, - tz: None, - fn_params: None, - }; - let res = match roll_type { - RollType::Max => series.rolling_max(rolling_opts), - RollType::Min => series.rolling_min(rolling_opts), - RollType::Sum => series.rolling_sum(rolling_opts), - RollType::Mean => series.rolling_mean(rolling_opts), - }; - - let mut res = res.map_err(|e| ShellError::GenericError { - error: "Error calculating rolling values".into(), - msg: e.to_string(), - span: Some(call.head), - help: None, - inner: vec![], - })?; - - let name = format!("{}_{}", series.name(), roll_type.to_str()); - res.rename(&name); - - NuDataFrame::try_from_series(vec![res.into_series()], call.head) - .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) -} - -#[cfg(test)] -mod test { - use super::super::super::eager::DropNulls; - use super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(Rolling {}), Box::new(DropNulls {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/shift.rs b/crates/nu-cmd-dataframe/src/dataframe/series/shift.rs deleted file mode 100644 index 2f40cf0a45..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/series/shift.rs +++ /dev/null @@ -1,115 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame, NuExpression, NuLazyFrame}; -use nu_engine::command_prelude::*; - -use polars_plan::prelude::lit; - -#[derive(Clone)] -pub struct Shift; - -impl Command for Shift { - fn name(&self) -> &str { - "dfr shift" - } - - fn usage(&self) -> &str { - "Shifts the values by a given period." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .required("period", SyntaxShape::Int, "shift period") - .named( - "fill", - SyntaxShape::Any, - "Expression used to fill the null values (lazy df)", - Some('f'), - ) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("dataframe or lazyframe".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Shifts the values by a given period", - example: "[1 2 2 3 3] | dfr into-df | dfr shift 2 | dfr drop-nulls", - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new( - "0".to_string(), - vec![Value::test_int(1), Value::test_int(2), Value::test_int(2)], - )], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - let value = input.into_value(call.head)?; - if NuLazyFrame::can_downcast(&value) { - let df = NuLazyFrame::try_from_value(value)?; - command_lazy(engine_state, stack, call, df) - } else { - let df = NuDataFrame::try_from_value(value)?; - command_eager(engine_state, stack, call, df) - } - } -} - -fn command_eager( - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - df: NuDataFrame, -) -> Result { - let period: i64 = call.req(engine_state, stack, 0)?; - let series = df.as_series(call.head)?.shift(period); - - NuDataFrame::try_from_series(vec![series], call.head) - .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) -} - -fn command_lazy( - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - lazy: NuLazyFrame, -) -> Result { - let shift: i64 = call.req(engine_state, stack, 0)?; - let fill: Option = call.get_flag(engine_state, stack, "fill")?; - - let lazy = lazy.into_polars(); - - let lazy: NuLazyFrame = match fill { - Some(fill) => { - let expr = NuExpression::try_from_value(fill)?.into_polars(); - lazy.shift_and_fill(lit(shift), expr).into() - } - None => lazy.shift(shift).into(), - }; - - Ok(PipelineData::Value(lazy.into_value(call.head)?, None)) -} - -#[cfg(test)] -mod test { - use super::super::super::eager::DropNulls; - use super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(Shift {}), Box::new(DropNulls {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/string/concatenate.rs b/crates/nu-cmd-dataframe/src/dataframe/series/string/concatenate.rs deleted file mode 100644 index d7589bd3b1..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/series/string/concatenate.rs +++ /dev/null @@ -1,113 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame}; -use nu_engine::command_prelude::*; - -use polars::prelude::{IntoSeries, StringNameSpaceImpl}; - -#[derive(Clone)] -pub struct Concatenate; - -impl Command for Concatenate { - fn name(&self) -> &str { - "dfr concatenate" - } - - fn usage(&self) -> &str { - "Concatenates strings with other array." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .required( - "other", - SyntaxShape::Any, - "Other array with string to be concatenated", - ) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Concatenate string", - example: r#"let other = ([za xs cd] | dfr into-df); - [abc abc abc] | dfr into-df | dfr concatenate $other"#, - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new( - "0".to_string(), - vec![ - Value::test_string("abcza"), - Value::test_string("abcxs"), - Value::test_string("abccd"), - ], - )], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - - let other: Value = call.req(engine_state, stack, 0)?; - let other_span = other.span(); - let other_df = NuDataFrame::try_from_value(other)?; - - let other_series = other_df.as_series(other_span)?; - let other_chunked = other_series.str().map_err(|e| ShellError::GenericError { - error: "The concatenate only with string columns".into(), - msg: e.to_string(), - span: Some(other_span), - help: None, - inner: vec![], - })?; - - let series = df.as_series(call.head)?; - let chunked = series.str().map_err(|e| ShellError::GenericError { - error: "The concatenate only with string columns".into(), - msg: e.to_string(), - span: Some(call.head), - help: None, - inner: vec![], - })?; - - let mut res = chunked.concat(other_chunked); - - res.rename(series.name()); - - NuDataFrame::try_from_series(vec![res.into_series()], call.head) - .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) -} - -#[cfg(test)] -mod test { - use super::super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(Concatenate {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/string/contains.rs b/crates/nu-cmd-dataframe/src/dataframe/series/string/contains.rs deleted file mode 100644 index 9c1d92681e..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/series/string/contains.rs +++ /dev/null @@ -1,106 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame}; -use nu_engine::command_prelude::*; - -use polars::prelude::{IntoSeries, StringNameSpaceImpl}; - -#[derive(Clone)] -pub struct Contains; - -impl Command for Contains { - fn name(&self) -> &str { - "dfr contains" - } - - fn usage(&self) -> &str { - "Checks if a pattern is contained in a string." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .required( - "pattern", - SyntaxShape::String, - "Regex pattern to be searched", - ) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Returns boolean indicating if pattern was found", - example: "[abc acb acb] | dfr into-df | dfr contains ab", - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new( - "0".to_string(), - vec![ - Value::test_bool(true), - Value::test_bool(false), - Value::test_bool(false), - ], - )], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - let pattern: String = call.req(engine_state, stack, 0)?; - - let series = df.as_series(call.head)?; - let chunked = series.str().map_err(|e| ShellError::GenericError { - error: "The contains command only with string columns".into(), - msg: e.to_string(), - span: Some(call.head), - help: None, - inner: vec![], - })?; - - let res = chunked - .contains(&pattern, false) - .map_err(|e| ShellError::GenericError { - error: "Error searching in series".into(), - msg: e.to_string(), - span: Some(call.head), - help: None, - inner: vec![], - })?; - - NuDataFrame::try_from_series(vec![res.into_series()], call.head) - .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) -} - -#[cfg(test)] -mod test { - use super::super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(Contains {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/string/mod.rs b/crates/nu-cmd-dataframe/src/dataframe/series/string/mod.rs deleted file mode 100644 index f2fa19cbaf..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/series/string/mod.rs +++ /dev/null @@ -1,19 +0,0 @@ -mod concatenate; -mod contains; -mod replace; -mod replace_all; -mod str_lengths; -mod str_slice; -mod strftime; -mod to_lowercase; -mod to_uppercase; - -pub use concatenate::Concatenate; -pub use contains::Contains; -pub use replace::Replace; -pub use replace_all::ReplaceAll; -pub use str_lengths::StrLengths; -pub use str_slice::StrSlice; -pub use strftime::StrFTime; -pub use to_lowercase::ToLowerCase; -pub use to_uppercase::ToUpperCase; diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/string/replace.rs b/crates/nu-cmd-dataframe/src/dataframe/series/string/replace.rs deleted file mode 100644 index d954e20b66..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/series/string/replace.rs +++ /dev/null @@ -1,120 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame}; -use nu_engine::command_prelude::*; - -use polars::prelude::{IntoSeries, StringNameSpaceImpl}; - -#[derive(Clone)] -pub struct Replace; - -impl Command for Replace { - fn name(&self) -> &str { - "dfr replace" - } - - fn usage(&self) -> &str { - "Replace the leftmost (sub)string by a regex pattern." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .required_named( - "pattern", - SyntaxShape::String, - "Regex pattern to be matched", - Some('p'), - ) - .required_named( - "replace", - SyntaxShape::String, - "replacing string", - Some('r'), - ) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Replaces string", - example: "[abc abc abc] | dfr into-df | dfr replace --pattern ab --replace AB", - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new( - "0".to_string(), - vec![ - Value::test_string("ABc"), - Value::test_string("ABc"), - Value::test_string("ABc"), - ], - )], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let pattern: String = call - .get_flag(engine_state, stack, "pattern")? - .expect("required value"); - let replace: String = call - .get_flag(engine_state, stack, "replace")? - .expect("required value"); - - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - let series = df.as_series(call.head)?; - let chunked = series.str().map_err(|e| ShellError::GenericError { - error: "Error conversion to string".into(), - msg: e.to_string(), - span: Some(call.head), - help: None, - inner: vec![], - })?; - - let mut res = chunked - .replace(&pattern, &replace) - .map_err(|e| ShellError::GenericError { - error: "Error finding pattern other".into(), - msg: e.to_string(), - span: Some(call.head), - help: None, - inner: vec![], - })?; - - res.rename(series.name()); - - NuDataFrame::try_from_series(vec![res.into_series()], call.head) - .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) -} - -#[cfg(test)] -mod test { - use super::super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(Replace {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/string/replace_all.rs b/crates/nu-cmd-dataframe/src/dataframe/series/string/replace_all.rs deleted file mode 100644 index f329cbca73..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/series/string/replace_all.rs +++ /dev/null @@ -1,121 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame}; -use nu_engine::command_prelude::*; - -use polars::prelude::{IntoSeries, StringNameSpaceImpl}; - -#[derive(Clone)] -pub struct ReplaceAll; - -impl Command for ReplaceAll { - fn name(&self) -> &str { - "dfr replace-all" - } - - fn usage(&self) -> &str { - "Replace all (sub)strings by a regex pattern." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .required_named( - "pattern", - SyntaxShape::String, - "Regex pattern to be matched", - Some('p'), - ) - .required_named( - "replace", - SyntaxShape::String, - "replacing string", - Some('r'), - ) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Replaces string", - example: "[abac abac abac] | dfr into-df | dfr replace-all --pattern a --replace A", - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new( - "0".to_string(), - vec![ - Value::test_string("AbAc"), - Value::test_string("AbAc"), - Value::test_string("AbAc"), - ], - )], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let pattern: String = call - .get_flag(engine_state, stack, "pattern")? - .expect("required value"); - let replace: String = call - .get_flag(engine_state, stack, "replace")? - .expect("required value"); - - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - let series = df.as_series(call.head)?; - let chunked = series.str().map_err(|e| ShellError::GenericError { - error: "Error conversion to string".into(), - msg: e.to_string(), - span: Some(call.head), - help: None, - inner: vec![], - })?; - - let mut res = - chunked - .replace_all(&pattern, &replace) - .map_err(|e| ShellError::GenericError { - error: "Error finding pattern other".into(), - msg: e.to_string(), - span: Some(call.head), - help: None, - inner: vec![], - })?; - - res.rename(series.name()); - - NuDataFrame::try_from_series(vec![res.into_series()], call.head) - .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) -} - -#[cfg(test)] -mod test { - use super::super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(ReplaceAll {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/string/str_lengths.rs b/crates/nu-cmd-dataframe/src/dataframe/series/string/str_lengths.rs deleted file mode 100644 index 6889cef387..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/series/string/str_lengths.rs +++ /dev/null @@ -1,87 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame}; -use nu_engine::command_prelude::*; -use polars::prelude::{IntoSeries, StringNameSpaceImpl}; - -#[derive(Clone)] -pub struct StrLengths; - -impl Command for StrLengths { - fn name(&self) -> &str { - "dfr str-lengths" - } - - fn usage(&self) -> &str { - "Get lengths of all strings." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Returns string lengths", - example: "[a ab abc] | dfr into-df | dfr str-lengths", - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new( - "0".to_string(), - vec![Value::test_int(1), Value::test_int(2), Value::test_int(3)], - )], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - _engine_state: &EngineState, - _stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - let series = df.as_series(call.head)?; - - let chunked = series.str().map_err(|e| ShellError::GenericError { - error: "Error casting to string".into(), - msg: e.to_string(), - span: Some(call.head), - help: Some("The str-lengths command can only be used with string columns".into()), - inner: vec![], - })?; - - let res = chunked.as_ref().str_len_bytes().into_series(); - - NuDataFrame::try_from_series(vec![res], call.head) - .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) -} - -#[cfg(test)] -mod test { - use super::super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(StrLengths {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/string/str_slice.rs b/crates/nu-cmd-dataframe/src/dataframe/series/string/str_slice.rs deleted file mode 100644 index 6a5c8364c2..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/series/string/str_slice.rs +++ /dev/null @@ -1,136 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame}; -use nu_engine::command_prelude::*; - -use polars::{ - prelude::{IntoSeries, NamedFrom, StringNameSpaceImpl}, - series::Series, -}; - -#[derive(Clone)] -pub struct StrSlice; - -impl Command for StrSlice { - fn name(&self) -> &str { - "dfr str-slice" - } - - fn usage(&self) -> &str { - "Slices the string from the start position until the selected length." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .required("start", SyntaxShape::Int, "start of slice") - .named("length", SyntaxShape::Int, "optional length", Some('l')) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![ - Example { - description: "Creates slices from the strings", - example: "[abcded abc321 abc123] | dfr into-df | dfr str-slice 1 --length 2", - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new( - "0".to_string(), - vec![ - Value::test_string("bc"), - Value::test_string("bc"), - Value::test_string("bc"), - ], - )], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - Example { - description: "Creates slices from the strings without length", - example: "[abcded abc321 abc123] | dfr into-df | dfr str-slice 1", - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new( - "0".to_string(), - vec![ - Value::test_string("bcded"), - Value::test_string("bc321"), - Value::test_string("bc123"), - ], - )], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - ] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let start: i64 = call.req(engine_state, stack, 0)?; - let start = Series::new("", &[start]); - - let length: Option = call.get_flag(engine_state, stack, "length")?; - let length = match length { - Some(v) => Series::new("", &[v as u64]), - None => Series::new_null("", 1), - }; - - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - let series = df.as_series(call.head)?; - - let chunked = series.str().map_err(|e| ShellError::GenericError { - error: "Error casting to string".into(), - msg: e.to_string(), - span: Some(call.head), - help: Some("The str-slice command can only be used with string columns".into()), - inner: vec![], - })?; - - let res = chunked - .str_slice(&start, &length) - .map_err(|e| ShellError::GenericError { - error: "Dataframe Error".into(), - msg: e.to_string(), - span: Some(call.head), - help: None, - inner: vec![], - })? - .with_name(series.name()); - - NuDataFrame::try_from_series(vec![res.into_series()], call.head) - .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) -} - -#[cfg(test)] -mod test { - use super::super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(StrSlice {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/string/strftime.rs b/crates/nu-cmd-dataframe/src/dataframe/series/string/strftime.rs deleted file mode 100644 index 3cdfa84f8e..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/series/string/strftime.rs +++ /dev/null @@ -1,105 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame}; -use nu_engine::command_prelude::*; - -use polars::prelude::IntoSeries; - -#[derive(Clone)] -pub struct StrFTime; - -impl Command for StrFTime { - fn name(&self) -> &str { - "dfr strftime" - } - - fn usage(&self) -> &str { - "Formats date based on string rule." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .required("fmt", SyntaxShape::String, "Format rule") - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Formats date", - example: r#"let dt = ('2020-08-04T16:39:18+00:00' | into datetime --timezone 'UTC'); - let df = ([$dt $dt] | dfr into-df); - $df | dfr strftime "%Y/%m/%d""#, - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new( - "0".to_string(), - vec![ - Value::test_string("2020/08/04"), - Value::test_string("2020/08/04"), - ], - )], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let fmt: String = call.req(engine_state, stack, 0)?; - - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - let series = df.as_series(call.head)?; - - let casted = series.datetime().map_err(|e| ShellError::GenericError { - error: "Error casting to date".into(), - msg: e.to_string(), - span: Some(call.head), - help: Some("The str-slice command can only be used with string columns".into()), - inner: vec![], - })?; - - let res = casted - .strftime(&fmt) - .map_err(|e| ShellError::GenericError { - error: "Error formatting datetime".into(), - msg: e.to_string(), - span: Some(call.head), - help: None, - inner: vec![], - })? - .into_series(); - - NuDataFrame::try_from_series(vec![res.into_series()], call.head) - .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) -} - -#[cfg(explore_refactor_IntoDatetime)] -mod test { - use super::super::super::super::super::IntoDatetime; - use super::super::super::super::test_dataframe::test_dataframe; - use super::*; - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(StrFTime {}), Box::new(IntoDatetime {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/string/to_lowercase.rs b/crates/nu-cmd-dataframe/src/dataframe/series/string/to_lowercase.rs deleted file mode 100644 index 2340437e35..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/series/string/to_lowercase.rs +++ /dev/null @@ -1,92 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame}; -use nu_engine::command_prelude::*; -use polars::prelude::{IntoSeries, StringNameSpaceImpl}; - -#[derive(Clone)] -pub struct ToLowerCase; - -impl Command for ToLowerCase { - fn name(&self) -> &str { - "dfr lowercase" - } - - fn usage(&self) -> &str { - "Lowercase the strings in the column." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Modifies strings to lowercase", - example: "[Abc aBc abC] | dfr into-df | dfr lowercase", - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new( - "0".to_string(), - vec![ - Value::test_string("abc"), - Value::test_string("abc"), - Value::test_string("abc"), - ], - )], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - _engine_state: &EngineState, - _stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - let series = df.as_series(call.head)?; - - let casted = series.str().map_err(|e| ShellError::GenericError { - error: "Error casting to string".into(), - msg: e.to_string(), - span: Some(call.head), - help: Some("The str-slice command can only be used with string columns".into()), - inner: vec![], - })?; - - let mut res = casted.to_lowercase(); - res.rename(series.name()); - - NuDataFrame::try_from_series(vec![res.into_series()], call.head) - .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) -} - -#[cfg(test)] -mod test { - use super::super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(ToLowerCase {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/string/to_uppercase.rs b/crates/nu-cmd-dataframe/src/dataframe/series/string/to_uppercase.rs deleted file mode 100644 index 23378f5dc3..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/series/string/to_uppercase.rs +++ /dev/null @@ -1,96 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame}; -use nu_engine::command_prelude::*; -use polars::prelude::{IntoSeries, StringNameSpaceImpl}; - -#[derive(Clone)] -pub struct ToUpperCase; - -impl Command for ToUpperCase { - fn name(&self) -> &str { - "dfr uppercase" - } - - fn usage(&self) -> &str { - "Uppercase the strings in the column." - } - - fn search_terms(&self) -> Vec<&str> { - vec!["capitalize, caps, capital"] - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Modifies strings to uppercase", - example: "[Abc aBc abC] | dfr into-df | dfr uppercase", - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new( - "0".to_string(), - vec![ - Value::test_string("ABC"), - Value::test_string("ABC"), - Value::test_string("ABC"), - ], - )], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - _engine_state: &EngineState, - _stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - let series = df.as_series(call.head)?; - - let casted = series.str().map_err(|e| ShellError::GenericError { - error: "Error casting to string".into(), - msg: e.to_string(), - span: Some(call.head), - help: Some("The str-slice command can only be used with string columns".into()), - inner: vec![], - })?; - - let mut res = casted.to_uppercase(); - res.rename(series.name()); - - NuDataFrame::try_from_series(vec![res.into_series()], call.head) - .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) -} - -#[cfg(test)] -mod test { - use super::super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(ToUpperCase {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/unique.rs b/crates/nu-cmd-dataframe/src/dataframe/series/unique.rs deleted file mode 100644 index 1bc2e0dc1b..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/series/unique.rs +++ /dev/null @@ -1,146 +0,0 @@ -use crate::dataframe::{ - utils::extract_strings, - values::{Column, NuDataFrame, NuLazyFrame}, -}; -use nu_engine::command_prelude::*; - -use polars::prelude::{IntoSeries, UniqueKeepStrategy}; - -#[derive(Clone)] -pub struct Unique; - -impl Command for Unique { - fn name(&self) -> &str { - "dfr unique" - } - - fn usage(&self) -> &str { - "Returns unique values from a dataframe." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .named( - "subset", - SyntaxShape::Any, - "Subset of column(s) to use to maintain rows (lazy df)", - Some('s'), - ) - .switch( - "last", - "Keeps last unique value. Default keeps first value (lazy df)", - Some('l'), - ) - .switch( - "maintain-order", - "Keep the same order as the original DataFrame (lazy df)", - Some('k'), - ) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("dataframe or lazyframe".into())) - } - - fn examples(&self) -> Vec { - vec![ - Example { - description: "Returns unique values from a series", - example: "[2 2 2 2 2] | dfr into-df | dfr unique", - result: Some( - NuDataFrame::try_from_columns( - vec![Column::new("0".to_string(), vec![Value::test_int(2)])], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }, - Example { - description: "Creates a is unique expression from a column", - example: "col a | unique", - result: None, - }, - ] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - let value = input.into_value(call.head)?; - if NuLazyFrame::can_downcast(&value) { - let df = NuLazyFrame::try_from_value(value)?; - command_lazy(engine_state, stack, call, df) - } else { - let df = NuDataFrame::try_from_value(value)?; - command_eager(engine_state, stack, call, df) - } - } -} - -fn command_eager( - _engine_state: &EngineState, - _stack: &mut Stack, - call: &Call, - df: NuDataFrame, -) -> Result { - let series = df.as_series(call.head)?; - - let res = series.unique().map_err(|e| ShellError::GenericError { - error: "Error calculating unique values".into(), - msg: e.to_string(), - span: Some(call.head), - help: Some("The str-slice command can only be used with string columns".into()), - inner: vec![], - })?; - - NuDataFrame::try_from_series(vec![res.into_series()], call.head) - .map(|df| PipelineData::Value(NuDataFrame::into_value(df, call.head), None)) -} - -fn command_lazy( - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - lazy: NuLazyFrame, -) -> Result { - let last = call.has_flag(engine_state, stack, "last")?; - let maintain = call.has_flag(engine_state, stack, "maintain-order")?; - - let subset: Option = call.get_flag(engine_state, stack, "subset")?; - let subset = match subset { - Some(value) => Some(extract_strings(value)?), - None => None, - }; - - let strategy = if last { - UniqueKeepStrategy::Last - } else { - UniqueKeepStrategy::First - }; - - let lazy = lazy.into_polars(); - let lazy: NuLazyFrame = if maintain { - lazy.unique(subset, strategy).into() - } else { - lazy.unique_stable(subset, strategy).into() - }; - - Ok(PipelineData::Value(lazy.into_value(call.head)?, None)) -} - -#[cfg(test)] -mod test { - use super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(Unique {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/series/value_counts.rs b/crates/nu-cmd-dataframe/src/dataframe/series/value_counts.rs deleted file mode 100644 index 87d3b42b3a..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/series/value_counts.rs +++ /dev/null @@ -1,95 +0,0 @@ -use crate::dataframe::values::{Column, NuDataFrame}; -use nu_engine::command_prelude::*; -use polars::prelude::SeriesMethods; - -#[derive(Clone)] -pub struct ValueCount; - -impl Command for ValueCount { - fn name(&self) -> &str { - "dfr value-counts" - } - - fn usage(&self) -> &str { - "Returns a dataframe with the counts for unique values in series." - } - - fn signature(&self) -> Signature { - Signature::build(self.name()) - .input_output_type( - Type::Custom("dataframe".into()), - Type::Custom("dataframe".into()), - ) - .category(Category::Custom("dataframe".into())) - } - - fn examples(&self) -> Vec { - vec![Example { - description: "Calculates value counts", - example: "[5 5 5 5 6 6] | dfr into-df | dfr value-counts", - result: Some( - NuDataFrame::try_from_columns( - vec![ - Column::new( - "0".to_string(), - vec![Value::test_int(5), Value::test_int(6)], - ), - Column::new( - "count".to_string(), - vec![Value::test_int(4), Value::test_int(2)], - ), - ], - None, - ) - .expect("simple df for test should not fail") - .into_value(Span::test_data()), - ), - }] - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - input: PipelineData, - ) -> Result { - command(engine_state, stack, call, input) - } -} - -fn command( - _engine_state: &EngineState, - _stack: &mut Stack, - call: &Call, - input: PipelineData, -) -> Result { - let df = NuDataFrame::try_from_pipeline(input, call.head)?; - let series = df.as_series(call.head)?; - - let res = series - .value_counts(false, false) - .map_err(|e| ShellError::GenericError { - error: "Error calculating value counts values".into(), - msg: e.to_string(), - span: Some(call.head), - help: Some("The str-slice command can only be used with string columns".into()), - inner: vec![], - })?; - - Ok(PipelineData::Value( - NuDataFrame::dataframe_into_value(res, call.head), - None, - )) -} - -#[cfg(test)] -mod test { - use super::super::super::test_dataframe::test_dataframe; - use super::*; - - #[test] - fn test_examples() { - test_dataframe(vec![Box::new(ValueCount {})]) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/stub.rs b/crates/nu-cmd-dataframe/src/dataframe/stub.rs deleted file mode 100644 index dfabbe0b82..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/stub.rs +++ /dev/null @@ -1,34 +0,0 @@ -use nu_engine::{command_prelude::*, get_full_help}; - -#[derive(Clone)] -pub struct Dfr; - -impl Command for Dfr { - fn name(&self) -> &str { - "dfr" - } - - fn usage(&self) -> &str { - "Operate with data in a dataframe format." - } - - fn signature(&self) -> nu_protocol::Signature { - Signature::build("dfr") - .category(Category::Custom("dataframe".into())) - .input_output_types(vec![(Type::Nothing, Type::String)]) - } - - fn extra_usage(&self) -> &str { - "You must use one of the following subcommands. Using this command as-is will only produce this help message." - } - - fn run( - &self, - engine_state: &EngineState, - stack: &mut Stack, - call: &Call, - _input: PipelineData, - ) -> Result { - Ok(Value::string(get_full_help(self, engine_state, stack), call.head).into_pipeline_data()) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/test_dataframe.rs b/crates/nu-cmd-dataframe/src/dataframe/test_dataframe.rs deleted file mode 100644 index 39c30be9dd..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/test_dataframe.rs +++ /dev/null @@ -1,98 +0,0 @@ -use super::{ - eager::{SchemaDF, ToDataFrame}, - expressions::ExprCol, - lazy::{LazyCollect, LazyFillNull, ToLazyFrame}, -}; -use nu_cmd_lang::Let; -use nu_engine::{command_prelude::*, eval_block}; -use nu_parser::parse; -use nu_protocol::{debugger::WithoutDebug, engine::StateWorkingSet}; - -pub fn test_dataframe(cmds: Vec>) { - if cmds.is_empty() { - panic!("Empty commands vector") - } - - // The first element in the cmds vector must be the one tested - let examples = cmds[0].examples(); - let mut engine_state = build_test_engine_state(cmds.clone()); - - for example in examples { - test_dataframe_example(&mut engine_state, &example); - } -} - -pub fn build_test_engine_state(cmds: Vec>) -> Box { - let mut engine_state = Box::new(EngineState::new()); - - let delta = { - // Base functions that are needed for testing - // Try to keep this working set small to keep tests running as fast as possible - let mut working_set = StateWorkingSet::new(&engine_state); - working_set.add_decl(Box::new(Let)); - working_set.add_decl(Box::new(ToDataFrame)); - working_set.add_decl(Box::new(ToLazyFrame)); - working_set.add_decl(Box::new(LazyCollect)); - working_set.add_decl(Box::new(ExprCol)); - working_set.add_decl(Box::new(SchemaDF)); - working_set.add_decl(Box::new(LazyFillNull)); - - // Adding the command that is being tested to the working set - for cmd in cmds.clone() { - working_set.add_decl(cmd); - } - - working_set.render() - }; - - engine_state - .merge_delta(delta) - .expect("Error merging delta"); - - engine_state -} - -pub fn test_dataframe_example(engine_state: &mut Box, example: &Example) { - // Skip tests that don't have results to compare to - if example.result.is_none() { - return; - } - - let start = std::time::Instant::now(); - - let (block, delta) = { - let mut working_set = StateWorkingSet::new(engine_state); - let output = parse(&mut working_set, None, example.example.as_bytes(), false); - - if let Some(err) = working_set.parse_errors.first() { - panic!("test parse error in `{}`: {:?}", example.example, err) - } - - (output, working_set.render()) - }; - - engine_state - .merge_delta(delta) - .expect("Error merging delta"); - - let mut stack = Stack::new().capture(); - - let result = - eval_block::(engine_state, &mut stack, &block, PipelineData::empty()) - .unwrap_or_else(|err| panic!("test eval error in `{}`: {:?}", example.example, err)) - .into_value(Span::test_data()) - .expect("ok value"); - - println!("input: {}", example.example); - println!("result: {result:?}"); - println!("done: {:?}", start.elapsed()); - - // Note. Value implements PartialEq for Bool, Int, Float, String and Block - // If the command you are testing requires to compare another case, then - // you need to define its equality in the Value struct - if let Some(expected) = example.result.clone() { - if result != expected { - panic!("the example result is different to expected value: {result:?} != {expected:?}") - } - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/utils.rs b/crates/nu-cmd-dataframe/src/dataframe/utils.rs deleted file mode 100644 index db99d550a9..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/utils.rs +++ /dev/null @@ -1,16 +0,0 @@ -use nu_protocol::{FromValue, ShellError, Value}; - -pub fn extract_strings(value: Value) -> Result, ShellError> { - let span = value.span(); - match ( - ::from_value(value.clone()), - as FromValue>::from_value(value), - ) { - (Ok(col), Err(_)) => Ok(vec![col]), - (Err(_), Ok(cols)) => Ok(cols), - _ => Err(ShellError::IncompatibleParametersSingle { - msg: "Expected a string or list of strings".into(), - span, - }), - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/values/mod.rs b/crates/nu-cmd-dataframe/src/dataframe/values/mod.rs deleted file mode 100644 index eaed15aa4b..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/values/mod.rs +++ /dev/null @@ -1,14 +0,0 @@ -mod nu_dataframe; -mod nu_expression; -mod nu_lazyframe; -mod nu_lazygroupby; -mod nu_schema; -mod nu_when; -pub mod utils; - -pub use nu_dataframe::{Axis, Column, NuDataFrame}; -pub use nu_expression::NuExpression; -pub use nu_lazyframe::NuLazyFrame; -pub use nu_lazygroupby::NuLazyGroupBy; -pub use nu_schema::{str_to_dtype, NuSchema}; -pub use nu_when::NuWhen; diff --git a/crates/nu-cmd-dataframe/src/dataframe/values/nu_dataframe/between_values.rs b/crates/nu-cmd-dataframe/src/dataframe/values/nu_dataframe/between_values.rs deleted file mode 100644 index 74a484825a..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/values/nu_dataframe/between_values.rs +++ /dev/null @@ -1,884 +0,0 @@ -use super::{operations::Axis, NuDataFrame}; -use nu_protocol::{ - ast::{Boolean, Comparison, Math, Operator}, - ShellError, Span, Spanned, Value, -}; -use num::Zero; -use polars::prelude::{ - BooleanType, ChunkCompare, ChunkedArray, DataType, Float64Type, Int64Type, IntoSeries, - NumOpsDispatchChecked, PolarsError, Series, StringNameSpaceImpl, -}; -use std::ops::{Add, BitAnd, BitOr, Div, Mul, Sub}; - -pub(super) fn between_dataframes( - operator: Spanned, - left: &Value, - lhs: &NuDataFrame, - right: &Value, - rhs: &NuDataFrame, -) -> Result { - let operation_span = Span::merge(left.span(), right.span()); - match operator.item { - Operator::Math(Math::Plus) => match lhs.append_df(rhs, Axis::Row, operation_span) { - Ok(df) => Ok(df.into_value(operation_span)), - Err(e) => Err(e), - }, - _ => Err(ShellError::OperatorMismatch { - op_span: operator.span, - lhs_ty: left.get_type().to_string(), - lhs_span: left.span(), - rhs_ty: right.get_type().to_string(), - rhs_span: right.span(), - }), - } -} - -pub(super) fn compute_between_series( - operator: Spanned, - left: &Value, - lhs: &Series, - right: &Value, - rhs: &Series, -) -> Result { - let operation_span = Span::merge(left.span(), right.span()); - match operator.item { - Operator::Math(Math::Plus) => { - let mut res = lhs + rhs; - let name = format!("sum_{}_{}", lhs.name(), rhs.name()); - res.rename(&name); - NuDataFrame::series_to_value(res, operation_span) - } - Operator::Math(Math::Minus) => { - let mut res = lhs - rhs; - let name = format!("sub_{}_{}", lhs.name(), rhs.name()); - res.rename(&name); - NuDataFrame::series_to_value(res, operation_span) - } - Operator::Math(Math::Multiply) => { - let mut res = lhs * rhs; - let name = format!("mul_{}_{}", lhs.name(), rhs.name()); - res.rename(&name); - NuDataFrame::series_to_value(res, operation_span) - } - Operator::Math(Math::Divide) => { - let res = lhs.checked_div(rhs); - match res { - Ok(mut res) => { - let name = format!("div_{}_{}", lhs.name(), rhs.name()); - res.rename(&name); - NuDataFrame::series_to_value(res, operation_span) - } - Err(e) => Err(ShellError::GenericError { - error: "Division error".into(), - msg: e.to_string(), - span: Some(right.span()), - help: None, - inner: vec![], - }), - } - } - Operator::Comparison(Comparison::Equal) => { - let name = format!("eq_{}_{}", lhs.name(), rhs.name()); - let res = compare_series(lhs, rhs, name.as_str(), right.span(), Series::equal)?; - NuDataFrame::series_to_value(res, operation_span) - } - Operator::Comparison(Comparison::NotEqual) => { - let name = format!("neq_{}_{}", lhs.name(), rhs.name()); - let res = compare_series(lhs, rhs, name.as_str(), right.span(), Series::not_equal)?; - NuDataFrame::series_to_value(res, operation_span) - } - Operator::Comparison(Comparison::LessThan) => { - let name = format!("lt_{}_{}", lhs.name(), rhs.name()); - let res = compare_series(lhs, rhs, name.as_str(), right.span(), Series::lt)?; - NuDataFrame::series_to_value(res, operation_span) - } - Operator::Comparison(Comparison::LessThanOrEqual) => { - let name = format!("lte_{}_{}", lhs.name(), rhs.name()); - let res = compare_series(lhs, rhs, name.as_str(), right.span(), Series::lt_eq)?; - NuDataFrame::series_to_value(res, operation_span) - } - Operator::Comparison(Comparison::GreaterThan) => { - let name = format!("gt_{}_{}", lhs.name(), rhs.name()); - let res = compare_series(lhs, rhs, name.as_str(), right.span(), Series::gt)?; - NuDataFrame::series_to_value(res, operation_span) - } - Operator::Comparison(Comparison::GreaterThanOrEqual) => { - let name = format!("gte_{}_{}", lhs.name(), rhs.name()); - let res = compare_series(lhs, rhs, name.as_str(), right.span(), Series::gt_eq)?; - NuDataFrame::series_to_value(res, operation_span) - } - Operator::Boolean(Boolean::And) => match lhs.dtype() { - DataType::Boolean => { - let lhs_cast = lhs.bool(); - let rhs_cast = rhs.bool(); - - match (lhs_cast, rhs_cast) { - (Ok(l), Ok(r)) => { - let mut res = l.bitand(r).into_series(); - let name = format!("and_{}_{}", lhs.name(), rhs.name()); - res.rename(&name); - NuDataFrame::series_to_value(res, operation_span) - } - _ => Err(ShellError::GenericError { - error: "Incompatible types".into(), - msg: "unable to cast to boolean".into(), - span: Some(right.span()), - help: None, - inner: vec![], - }), - } - } - _ => Err(ShellError::IncompatibleParametersSingle { - msg: format!( - "Operation {} can only be done with boolean values", - operator.item - ), - span: operation_span, - }), - }, - Operator::Boolean(Boolean::Or) => match lhs.dtype() { - DataType::Boolean => { - let lhs_cast = lhs.bool(); - let rhs_cast = rhs.bool(); - - match (lhs_cast, rhs_cast) { - (Ok(l), Ok(r)) => { - let mut res = l.bitor(r).into_series(); - let name = format!("or_{}_{}", lhs.name(), rhs.name()); - res.rename(&name); - NuDataFrame::series_to_value(res, operation_span) - } - _ => Err(ShellError::GenericError { - error: "Incompatible types".into(), - msg: "unable to cast to boolean".into(), - span: Some(right.span()), - help: None, - inner: vec![], - }), - } - } - _ => Err(ShellError::IncompatibleParametersSingle { - msg: format!( - "Operation {} can only be done with boolean values", - operator.item - ), - span: operation_span, - }), - }, - _ => Err(ShellError::OperatorMismatch { - op_span: operator.span, - lhs_ty: left.get_type().to_string(), - lhs_span: left.span(), - rhs_ty: right.get_type().to_string(), - rhs_span: right.span(), - }), - } -} - -fn compare_series<'s, F>( - lhs: &'s Series, - rhs: &'s Series, - name: &'s str, - span: Span, - f: F, -) -> Result -where - F: Fn(&'s Series, &'s Series) -> Result, PolarsError>, -{ - let mut res = f(lhs, rhs) - .map_err(|e| ShellError::GenericError { - error: "Equality error".into(), - msg: e.to_string(), - span: Some(span), - help: None, - inner: vec![], - })? - .into_series(); - - res.rename(name); - Ok(res) -} - -pub(super) fn compute_series_single_value( - operator: Spanned, - left: &Value, - lhs: &NuDataFrame, - right: &Value, -) -> Result { - if !lhs.is_series() { - return Err(ShellError::OperatorMismatch { - op_span: operator.span, - lhs_ty: left.get_type().to_string(), - lhs_span: left.span(), - rhs_ty: right.get_type().to_string(), - rhs_span: right.span(), - }); - } - - let lhs_span = left.span(); - let lhs = lhs.as_series(lhs_span)?; - - match operator.item { - Operator::Math(Math::Plus) => match &right { - Value::Int { val, .. } => { - compute_series_i64(&lhs, *val, >::add, lhs_span) - } - Value::Float { val, .. } => { - compute_series_float(&lhs, *val, >::add, lhs_span) - } - Value::String { val, .. } => add_string_to_series(&lhs, val, lhs_span), - _ => Err(ShellError::OperatorMismatch { - op_span: operator.span, - lhs_ty: left.get_type().to_string(), - lhs_span: left.span(), - rhs_ty: right.get_type().to_string(), - rhs_span: right.span(), - }), - }, - Operator::Math(Math::Minus) => match &right { - Value::Int { val, .. } => { - compute_series_i64(&lhs, *val, >::sub, lhs_span) - } - Value::Float { val, .. } => { - compute_series_float(&lhs, *val, >::sub, lhs_span) - } - _ => Err(ShellError::OperatorMismatch { - op_span: operator.span, - lhs_ty: left.get_type().to_string(), - lhs_span: left.span(), - rhs_ty: right.get_type().to_string(), - rhs_span: right.span(), - }), - }, - Operator::Math(Math::Multiply) => match &right { - Value::Int { val, .. } => { - compute_series_i64(&lhs, *val, >::mul, lhs_span) - } - Value::Float { val, .. } => { - compute_series_float(&lhs, *val, >::mul, lhs_span) - } - _ => Err(ShellError::OperatorMismatch { - op_span: operator.span, - lhs_ty: left.get_type().to_string(), - lhs_span: left.span(), - rhs_ty: right.get_type().to_string(), - rhs_span: right.span(), - }), - }, - Operator::Math(Math::Divide) => { - let span = right.span(); - match &right { - Value::Int { val, .. } => { - if *val == 0 { - Err(ShellError::DivisionByZero { span }) - } else { - compute_series_i64(&lhs, *val, >::div, lhs_span) - } - } - Value::Float { val, .. } => { - if val.is_zero() { - Err(ShellError::DivisionByZero { span }) - } else { - compute_series_float(&lhs, *val, >::div, lhs_span) - } - } - _ => Err(ShellError::OperatorMismatch { - op_span: operator.span, - lhs_ty: left.get_type().to_string(), - lhs_span: left.span(), - rhs_ty: right.get_type().to_string(), - rhs_span: right.span(), - }), - } - } - Operator::Comparison(Comparison::Equal) => match &right { - Value::Int { val, .. } => compare_series_i64(&lhs, *val, ChunkedArray::equal, lhs_span), - Value::Float { val, .. } => { - compare_series_float(&lhs, *val, ChunkedArray::equal, lhs_span) - } - Value::String { val, .. } => { - let equal_pattern = format!("^{}$", fancy_regex::escape(val)); - contains_series_pat(&lhs, &equal_pattern, lhs_span) - } - Value::Date { val, .. } => { - compare_series_i64(&lhs, val.timestamp_millis(), ChunkedArray::equal, lhs_span) - } - _ => Err(ShellError::OperatorMismatch { - op_span: operator.span, - lhs_ty: left.get_type().to_string(), - lhs_span: left.span(), - rhs_ty: right.get_type().to_string(), - rhs_span: right.span(), - }), - }, - Operator::Comparison(Comparison::NotEqual) => match &right { - Value::Int { val, .. } => { - compare_series_i64(&lhs, *val, ChunkedArray::not_equal, lhs_span) - } - Value::Float { val, .. } => { - compare_series_float(&lhs, *val, ChunkedArray::not_equal, lhs_span) - } - Value::Date { val, .. } => compare_series_i64( - &lhs, - val.timestamp_millis(), - ChunkedArray::not_equal, - lhs_span, - ), - _ => Err(ShellError::OperatorMismatch { - op_span: operator.span, - lhs_ty: left.get_type().to_string(), - lhs_span: left.span(), - rhs_ty: right.get_type().to_string(), - rhs_span: right.span(), - }), - }, - Operator::Comparison(Comparison::LessThan) => match &right { - Value::Int { val, .. } => compare_series_i64(&lhs, *val, ChunkedArray::lt, lhs_span), - Value::Float { val, .. } => { - compare_series_float(&lhs, *val, ChunkedArray::lt, lhs_span) - } - Value::Date { val, .. } => { - compare_series_i64(&lhs, val.timestamp_millis(), ChunkedArray::lt, lhs_span) - } - _ => Err(ShellError::OperatorMismatch { - op_span: operator.span, - lhs_ty: left.get_type().to_string(), - lhs_span: left.span(), - rhs_ty: right.get_type().to_string(), - rhs_span: right.span(), - }), - }, - Operator::Comparison(Comparison::LessThanOrEqual) => match &right { - Value::Int { val, .. } => compare_series_i64(&lhs, *val, ChunkedArray::lt_eq, lhs_span), - Value::Float { val, .. } => { - compare_series_float(&lhs, *val, ChunkedArray::lt_eq, lhs_span) - } - Value::Date { val, .. } => { - compare_series_i64(&lhs, val.timestamp_millis(), ChunkedArray::lt_eq, lhs_span) - } - _ => Err(ShellError::OperatorMismatch { - op_span: operator.span, - lhs_ty: left.get_type().to_string(), - lhs_span: left.span(), - rhs_ty: right.get_type().to_string(), - rhs_span: right.span(), - }), - }, - Operator::Comparison(Comparison::GreaterThan) => match &right { - Value::Int { val, .. } => compare_series_i64(&lhs, *val, ChunkedArray::gt, lhs_span), - Value::Float { val, .. } => { - compare_series_float(&lhs, *val, ChunkedArray::gt, lhs_span) - } - Value::Date { val, .. } => { - compare_series_i64(&lhs, val.timestamp_millis(), ChunkedArray::gt, lhs_span) - } - _ => Err(ShellError::OperatorMismatch { - op_span: operator.span, - lhs_ty: left.get_type().to_string(), - lhs_span: left.span(), - rhs_ty: right.get_type().to_string(), - rhs_span: right.span(), - }), - }, - Operator::Comparison(Comparison::GreaterThanOrEqual) => match &right { - Value::Int { val, .. } => compare_series_i64(&lhs, *val, ChunkedArray::gt_eq, lhs_span), - Value::Float { val, .. } => { - compare_series_float(&lhs, *val, ChunkedArray::gt_eq, lhs_span) - } - Value::Date { val, .. } => { - compare_series_i64(&lhs, val.timestamp_millis(), ChunkedArray::gt_eq, lhs_span) - } - _ => Err(ShellError::OperatorMismatch { - op_span: operator.span, - lhs_ty: left.get_type().to_string(), - lhs_span: left.span(), - rhs_ty: right.get_type().to_string(), - rhs_span: right.span(), - }), - }, - // TODO: update this to do a regex match instead of a simple contains? - Operator::Comparison(Comparison::RegexMatch) => match &right { - Value::String { val, .. } => contains_series_pat(&lhs, val, lhs_span), - _ => Err(ShellError::OperatorMismatch { - op_span: operator.span, - lhs_ty: left.get_type().to_string(), - lhs_span: left.span(), - rhs_ty: right.get_type().to_string(), - rhs_span: right.span(), - }), - }, - Operator::Comparison(Comparison::StartsWith) => match &right { - Value::String { val, .. } => { - let starts_with_pattern = format!("^{}", fancy_regex::escape(val)); - contains_series_pat(&lhs, &starts_with_pattern, lhs_span) - } - _ => Err(ShellError::OperatorMismatch { - op_span: operator.span, - lhs_ty: left.get_type().to_string(), - lhs_span: left.span(), - rhs_ty: right.get_type().to_string(), - rhs_span: right.span(), - }), - }, - Operator::Comparison(Comparison::EndsWith) => match &right { - Value::String { val, .. } => { - let ends_with_pattern = format!("{}$", fancy_regex::escape(val)); - contains_series_pat(&lhs, &ends_with_pattern, lhs_span) - } - _ => Err(ShellError::OperatorMismatch { - op_span: operator.span, - lhs_ty: left.get_type().to_string(), - lhs_span: left.span(), - rhs_ty: right.get_type().to_string(), - rhs_span: right.span(), - }), - }, - _ => Err(ShellError::OperatorMismatch { - op_span: operator.span, - lhs_ty: left.get_type().to_string(), - lhs_span: left.span(), - rhs_ty: right.get_type().to_string(), - rhs_span: right.span(), - }), - } -} - -fn compute_series_i64(series: &Series, val: i64, f: F, span: Span) -> Result -where - F: Fn(ChunkedArray, i64) -> ChunkedArray, -{ - match series.dtype() { - DataType::UInt32 | DataType::Int32 | DataType::UInt64 => { - let to_i64 = series.cast(&DataType::Int64); - - match to_i64 { - Ok(series) => { - let casted = series.i64(); - compute_casted_i64(casted, val, f, span) - } - Err(e) => Err(ShellError::GenericError { - error: "Unable to cast to i64".into(), - msg: e.to_string(), - span: Some(span), - help: None, - inner: vec![], - }), - } - } - DataType::Int64 => { - let casted = series.i64(); - compute_casted_i64(casted, val, f, span) - } - _ => Err(ShellError::GenericError { - error: "Incorrect type".into(), - msg: format!( - "Series of type {} can not be used for operations with an i64 value", - series.dtype() - ), - span: Some(span), - help: None, - inner: vec![], - }), - } -} - -fn compute_casted_i64( - casted: Result<&ChunkedArray, PolarsError>, - val: i64, - f: F, - span: Span, -) -> Result -where - F: Fn(ChunkedArray, i64) -> ChunkedArray, -{ - match casted { - Ok(casted) => { - let res = f(casted.clone(), val); - let res = res.into_series(); - NuDataFrame::series_to_value(res, span) - } - Err(e) => Err(ShellError::GenericError { - error: "Unable to cast to i64".into(), - msg: e.to_string(), - span: Some(span), - help: None, - inner: vec![], - }), - } -} - -fn compute_series_float(series: &Series, val: f64, f: F, span: Span) -> Result -where - F: Fn(ChunkedArray, f64) -> ChunkedArray, -{ - match series.dtype() { - DataType::Float32 => { - let to_f64 = series.cast(&DataType::Float64); - - match to_f64 { - Ok(series) => { - let casted = series.f64(); - compute_casted_f64(casted, val, f, span) - } - Err(e) => Err(ShellError::GenericError { - error: "Unable to cast to f64".into(), - msg: e.to_string(), - span: Some(span), - help: None, - inner: vec![], - }), - } - } - DataType::Float64 => { - let casted = series.f64(); - compute_casted_f64(casted, val, f, span) - } - _ => Err(ShellError::GenericError { - error: "Incorrect type".into(), - msg: format!( - "Series of type {} can not be used for operations with a float value", - series.dtype() - ), - span: Some(span), - help: None, - inner: vec![], - }), - } -} - -fn compute_casted_f64( - casted: Result<&ChunkedArray, PolarsError>, - val: f64, - f: F, - span: Span, -) -> Result -where - F: Fn(ChunkedArray, f64) -> ChunkedArray, -{ - match casted { - Ok(casted) => { - let res = f(casted.clone(), val); - let res = res.into_series(); - NuDataFrame::series_to_value(res, span) - } - Err(e) => Err(ShellError::GenericError { - error: "Unable to cast to f64".into(), - msg: e.to_string(), - span: Some(span), - help: None, - inner: vec![], - }), - } -} - -fn compare_series_i64(series: &Series, val: i64, f: F, span: Span) -> Result -where - F: Fn(&ChunkedArray, i64) -> ChunkedArray, -{ - match series.dtype() { - DataType::UInt32 | DataType::Int32 | DataType::UInt64 | DataType::Datetime(_, _) => { - let to_i64 = series.cast(&DataType::Int64); - - match to_i64 { - Ok(series) => { - let casted = series.i64(); - compare_casted_i64(casted, val, f, span) - } - Err(e) => Err(ShellError::GenericError { - error: "Unable to cast to f64".into(), - msg: e.to_string(), - span: Some(span), - help: None, - inner: vec![], - }), - } - } - DataType::Date => { - let to_i64 = series.cast(&DataType::Int64); - - match to_i64 { - Ok(series) => { - let nanosecs_per_day: i64 = 24 * 60 * 60 * 1_000_000_000; - let casted = series - .i64() - .map(|chunked| chunked.mul(nanosecs_per_day)) - .expect("already checked for casting"); - compare_casted_i64(Ok(&casted), val, f, span) - } - Err(e) => Err(ShellError::GenericError { - error: "Unable to cast to f64".into(), - msg: e.to_string(), - span: Some(span), - help: None, - inner: vec![], - }), - } - } - DataType::Int64 => { - let casted = series.i64(); - compare_casted_i64(casted, val, f, span) - } - _ => Err(ShellError::GenericError { - error: "Incorrect type".into(), - msg: format!( - "Series of type {} can not be used for operations with an i64 value", - series.dtype() - ), - span: Some(span), - help: None, - inner: vec![], - }), - } -} - -fn compare_casted_i64( - casted: Result<&ChunkedArray, PolarsError>, - val: i64, - f: F, - span: Span, -) -> Result -where - F: Fn(&ChunkedArray, i64) -> ChunkedArray, -{ - match casted { - Ok(casted) => { - let res = f(casted, val); - let res = res.into_series(); - NuDataFrame::series_to_value(res, span) - } - Err(e) => Err(ShellError::GenericError { - error: "Unable to cast to i64".into(), - msg: e.to_string(), - span: Some(span), - help: None, - inner: vec![], - }), - } -} - -fn compare_series_float(series: &Series, val: f64, f: F, span: Span) -> Result -where - F: Fn(&ChunkedArray, f64) -> ChunkedArray, -{ - match series.dtype() { - DataType::Float32 => { - let to_f64 = series.cast(&DataType::Float64); - - match to_f64 { - Ok(series) => { - let casted = series.f64(); - compare_casted_f64(casted, val, f, span) - } - Err(e) => Err(ShellError::GenericError { - error: "Unable to cast to i64".into(), - msg: e.to_string(), - span: Some(span), - help: None, - inner: vec![], - }), - } - } - DataType::Float64 => { - let casted = series.f64(); - compare_casted_f64(casted, val, f, span) - } - _ => Err(ShellError::GenericError { - error: "Incorrect type".into(), - msg: format!( - "Series of type {} can not be used for operations with a float value", - series.dtype() - ), - span: Some(span), - help: None, - inner: vec![], - }), - } -} - -fn compare_casted_f64( - casted: Result<&ChunkedArray, PolarsError>, - val: f64, - f: F, - span: Span, -) -> Result -where - F: Fn(&ChunkedArray, f64) -> ChunkedArray, -{ - match casted { - Ok(casted) => { - let res = f(casted, val); - let res = res.into_series(); - NuDataFrame::series_to_value(res, span) - } - Err(e) => Err(ShellError::GenericError { - error: "Unable to cast to f64".into(), - msg: e.to_string(), - span: Some(span), - help: None, - inner: vec![], - }), - } -} - -fn contains_series_pat(series: &Series, pat: &str, span: Span) -> Result { - let casted = series.str(); - match casted { - Ok(casted) => { - let res = casted.contains(pat, false); - - match res { - Ok(res) => { - let res = res.into_series(); - NuDataFrame::series_to_value(res, span) - } - Err(e) => Err(ShellError::GenericError { - error: "Error using contains".into(), - msg: e.to_string(), - span: Some(span), - help: None, - inner: vec![], - }), - } - } - Err(e) => Err(ShellError::GenericError { - error: "Unable to cast to string".into(), - msg: e.to_string(), - span: Some(span), - help: None, - inner: vec![], - }), - } -} - -fn add_string_to_series(series: &Series, pat: &str, span: Span) -> Result { - let casted = series.str(); - match casted { - Ok(casted) => { - let res = casted + pat; - let res = res.into_series(); - - NuDataFrame::series_to_value(res, span) - } - Err(e) => Err(ShellError::GenericError { - error: "Unable to cast to string".into(), - msg: e.to_string(), - span: Some(span), - help: None, - inner: vec![], - }), - } -} - -#[cfg(test)] -mod test { - use super::*; - use nu_protocol::Span; - use polars::{prelude::NamedFrom, series::Series}; - - use crate::dataframe::values::NuDataFrame; - - #[test] - fn test_compute_between_series_comparisons() { - let series = Series::new("c", &[1, 2]); - let df = NuDataFrame::try_from_series(vec![series], Span::test_data()) - .expect("should be able to create a simple dataframe"); - - let c0 = df - .column("c", Span::test_data()) - .expect("should be able to get column c"); - - let c0_series = c0 - .as_series(Span::test_data()) - .expect("should be able to get series"); - - let c0_value = c0.into_value(Span::test_data()); - - let c1 = df - .column("c", Span::test_data()) - .expect("should be able to get column c"); - - let c1_series = c1 - .as_series(Span::test_data()) - .expect("should be able to get series"); - - let c1_value = c1.into_value(Span::test_data()); - - let op = Spanned { - item: Operator::Comparison(Comparison::NotEqual), - span: Span::test_data(), - }; - let result = compute_between_series(op, &c0_value, &c0_series, &c1_value, &c1_series) - .expect("compare should not fail"); - let result = NuDataFrame::try_from_value(result) - .expect("should be able to create a dataframe from a value"); - let result = result - .as_series(Span::test_data()) - .expect("should be convert to a series"); - assert_eq!(result, Series::new("neq_c_c", &[false, false])); - - let op = Spanned { - item: Operator::Comparison(Comparison::Equal), - span: Span::test_data(), - }; - let result = compute_between_series(op, &c0_value, &c0_series, &c1_value, &c1_series) - .expect("compare should not fail"); - let result = NuDataFrame::try_from_value(result) - .expect("should be able to create a dataframe from a value"); - let result = result - .as_series(Span::test_data()) - .expect("should be convert to a series"); - assert_eq!(result, Series::new("eq_c_c", &[true, true])); - - let op = Spanned { - item: Operator::Comparison(Comparison::LessThan), - span: Span::test_data(), - }; - let result = compute_between_series(op, &c0_value, &c0_series, &c1_value, &c1_series) - .expect("compare should not fail"); - let result = NuDataFrame::try_from_value(result) - .expect("should be able to create a dataframe from a value"); - let result = result - .as_series(Span::test_data()) - .expect("should be convert to a series"); - assert_eq!(result, Series::new("lt_c_c", &[false, false])); - - let op = Spanned { - item: Operator::Comparison(Comparison::LessThanOrEqual), - span: Span::test_data(), - }; - let result = compute_between_series(op, &c0_value, &c0_series, &c1_value, &c1_series) - .expect("compare should not fail"); - let result = NuDataFrame::try_from_value(result) - .expect("should be able to create a dataframe from a value"); - let result = result - .as_series(Span::test_data()) - .expect("should be convert to a series"); - assert_eq!(result, Series::new("lte_c_c", &[true, true])); - - let op = Spanned { - item: Operator::Comparison(Comparison::GreaterThan), - span: Span::test_data(), - }; - let result = compute_between_series(op, &c0_value, &c0_series, &c1_value, &c1_series) - .expect("compare should not fail"); - let result = NuDataFrame::try_from_value(result) - .expect("should be able to create a dataframe from a value"); - let result = result - .as_series(Span::test_data()) - .expect("should be convert to a series"); - assert_eq!(result, Series::new("gt_c_c", &[false, false])); - - let op = Spanned { - item: Operator::Comparison(Comparison::GreaterThanOrEqual), - span: Span::test_data(), - }; - let result = compute_between_series(op, &c0_value, &c0_series, &c1_value, &c1_series) - .expect("compare should not fail"); - let result = NuDataFrame::try_from_value(result) - .expect("should be able to create a dataframe from a value"); - let result = result - .as_series(Span::test_data()) - .expect("should be convert to a series"); - assert_eq!(result, Series::new("gte_c_c", &[true, true])); - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/values/nu_dataframe/conversion.rs b/crates/nu-cmd-dataframe/src/dataframe/values/nu_dataframe/conversion.rs deleted file mode 100644 index 7ab339d78d..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/values/nu_dataframe/conversion.rs +++ /dev/null @@ -1,1435 +0,0 @@ -use super::{DataFrameValue, NuDataFrame, NuSchema}; -use chrono::{DateTime, Duration, FixedOffset, NaiveTime, TimeZone, Utc}; -use chrono_tz::Tz; -use indexmap::map::{Entry, IndexMap}; -use nu_protocol::{Record, ShellError, Span, Value}; -use polars::{ - chunked_array::{ - builder::AnonymousOwnedListBuilder, object::builder::ObjectChunkedBuilder, ChunkedArray, - }, - datatypes::AnyValue, - export::arrow::Either, - prelude::{ - DataFrame, DataType, DatetimeChunked, Float32Type, Float64Type, Int16Type, Int32Type, - Int64Type, Int8Type, IntoSeries, ListBooleanChunkedBuilder, ListBuilderTrait, - ListPrimitiveChunkedBuilder, ListStringChunkedBuilder, ListType, NamedFrom, - NewChunkedArray, ObjectType, Schema, Series, StructChunked, TemporalMethods, TimeUnit, - UInt16Type, UInt32Type, UInt64Type, UInt8Type, - }, -}; -use std::ops::{Deref, DerefMut}; - -const NANOS_PER_DAY: i64 = 86_400_000_000_000; - -// The values capacity is for the size of an vec. -// Since this is impossible to determine without traversing every value -// I just picked one. Since this is for converting back and forth -// between nushell tables the values shouldn't be too extremely large for -// practical reasons (~ a few thousand rows). -const VALUES_CAPACITY: usize = 10; - -macro_rules! value_to_primitive { - ($value:ident, u8) => { - $value.as_i64().map(|v| v as u8) - }; - ($value:ident, u16) => { - $value.as_i64().map(|v| v as u16) - }; - ($value:ident, u32) => { - $value.as_i64().map(|v| v as u32) - }; - ($value:ident, u64) => { - $value.as_i64().map(|v| v as u64) - }; - ($value:ident, i8) => { - $value.as_i64().map(|v| v as i8) - }; - ($value:ident, i16) => { - $value.as_i64().map(|v| v as i16) - }; - ($value:ident, i32) => { - $value.as_i64().map(|v| v as i32) - }; - ($value:ident, i64) => { - $value.as_i64() - }; - ($value:ident, f32) => { - $value.as_f64().map(|v| v as f32) - }; - ($value:ident, f64) => { - $value.as_f64() - }; -} - -#[derive(Debug)] -pub struct Column { - name: String, - values: Vec, -} - -impl Column { - pub fn new(name: String, values: Vec) -> Self { - Self { name, values } - } - - pub fn new_empty(name: String) -> Self { - Self { - name, - values: Vec::new(), - } - } - - pub fn name(&self) -> &str { - self.name.as_str() - } -} - -impl IntoIterator for Column { - type Item = Value; - type IntoIter = std::vec::IntoIter; - - fn into_iter(self) -> Self::IntoIter { - self.values.into_iter() - } -} - -impl Deref for Column { - type Target = Vec; - - fn deref(&self) -> &Self::Target { - &self.values - } -} - -impl DerefMut for Column { - fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.values - } -} - -#[derive(Debug)] -pub struct TypedColumn { - column: Column, - column_type: Option, -} - -impl TypedColumn { - fn new_empty(name: String) -> Self { - Self { - column: Column::new_empty(name), - column_type: None, - } - } -} - -impl Deref for TypedColumn { - type Target = Column; - - fn deref(&self) -> &Self::Target { - &self.column - } -} - -impl DerefMut for TypedColumn { - fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.column - } -} - -pub type ColumnMap = IndexMap; - -pub fn create_column( - series: &Series, - from_row: usize, - to_row: usize, - span: Span, -) -> Result { - let size = to_row - from_row; - let values = series_to_values(series, Some(from_row), Some(size), span)?; - Ok(Column::new(series.name().into(), values)) -} - -// Adds a separator to the vector of values using the column names from the -// dataframe to create the Values Row -pub fn add_separator(values: &mut Vec, df: &DataFrame, span: Span) { - let mut record = Record::new(); - - record.push("index", Value::string("...", span)); - - for name in df.get_column_names() { - record.push(name, Value::string("...", span)) - } - - values.push(Value::record(record, span)); -} - -// Inserting the values found in a Value::List or Value::Record -pub fn insert_record( - column_values: &mut ColumnMap, - record: Record, - maybe_schema: &Option, -) -> Result<(), ShellError> { - for (col, value) in record { - insert_value(value, col, column_values, maybe_schema)?; - } - - Ok(()) -} - -pub fn insert_value( - value: Value, - key: String, - column_values: &mut ColumnMap, - maybe_schema: &Option, -) -> Result<(), ShellError> { - let col_val = match column_values.entry(key.clone()) { - Entry::Vacant(entry) => entry.insert(TypedColumn::new_empty(key.clone())), - Entry::Occupied(entry) => entry.into_mut(), - }; - - // Checking that the type for the value is the same - // for the previous value in the column - if col_val.values.is_empty() { - if let Some(schema) = maybe_schema { - if let Some(field) = schema.schema.get_field(&key) { - col_val.column_type = Some(field.data_type().clone()); - } - } - - if col_val.column_type.is_none() { - col_val.column_type = Some(value_to_data_type(&value)); - } - - col_val.values.push(value); - } else { - let prev_value = &col_val.values[col_val.values.len() - 1]; - - match (&prev_value, &value) { - (Value::Int { .. }, Value::Int { .. }) - | (Value::Float { .. }, Value::Float { .. }) - | (Value::String { .. }, Value::String { .. }) - | (Value::Bool { .. }, Value::Bool { .. }) - | (Value::Date { .. }, Value::Date { .. }) - | (Value::Filesize { .. }, Value::Filesize { .. }) - | (Value::Duration { .. }, Value::Duration { .. }) => col_val.values.push(value), - (Value::List { .. }, _) => { - col_val.column_type = Some(value_to_data_type(&value)); - col_val.values.push(value); - } - _ => { - col_val.column_type = Some(DataType::Object("Value", None)); - col_val.values.push(value); - } - } - } - - Ok(()) -} - -fn value_to_data_type(value: &Value) -> DataType { - match &value { - Value::Int { .. } => DataType::Int64, - Value::Float { .. } => DataType::Float64, - Value::String { .. } => DataType::String, - Value::Bool { .. } => DataType::Boolean, - Value::Date { .. } => DataType::Date, - Value::Duration { .. } => DataType::Duration(TimeUnit::Nanoseconds), - Value::Filesize { .. } => DataType::Int64, - Value::List { vals, .. } => { - // We need to determined the type inside of the list. - // Since Value::List does not have any kind of - // type information, we need to look inside the list. - // This will cause errors if lists have inconsistent types. - // Basically, if a list column needs to be converted to dataframe, - // needs to have consistent types. - let list_type = vals - .iter() - .filter(|v| !matches!(v, Value::Nothing { .. })) - .map(value_to_data_type) - .nth(1) - .unwrap_or(DataType::Object("Value", None)); - - DataType::List(Box::new(list_type)) - } - _ => DataType::Object("Value", None), - } -} - -fn typed_column_to_series(name: &str, column: TypedColumn) -> Result { - if let Some(column_type) = &column.column_type { - match column_type { - DataType::Float32 => { - let series_values: Result, _> = column - .values - .iter() - .map(|v| v.as_f64().map(|v| v as f32)) - .collect(); - Ok(Series::new(name, series_values?)) - } - DataType::Float64 => { - let series_values: Result, _> = - column.values.iter().map(|v| v.as_f64()).collect(); - Ok(Series::new(name, series_values?)) - } - DataType::UInt8 => { - let series_values: Result, _> = column - .values - .iter() - .map(|v| v.as_i64().map(|v| v as u8)) - .collect(); - Ok(Series::new(name, series_values?)) - } - DataType::UInt16 => { - let series_values: Result, _> = column - .values - .iter() - .map(|v| v.as_i64().map(|v| v as u16)) - .collect(); - Ok(Series::new(name, series_values?)) - } - DataType::UInt32 => { - let series_values: Result, _> = column - .values - .iter() - .map(|v| v.as_i64().map(|v| v as u32)) - .collect(); - Ok(Series::new(name, series_values?)) - } - DataType::UInt64 => { - let series_values: Result, _> = column - .values - .iter() - .map(|v| v.as_i64().map(|v| v as u64)) - .collect(); - Ok(Series::new(name, series_values?)) - } - DataType::Int8 => { - let series_values: Result, _> = column - .values - .iter() - .map(|v| v.as_i64().map(|v| v as i8)) - .collect(); - Ok(Series::new(name, series_values?)) - } - DataType::Int16 => { - let series_values: Result, _> = column - .values - .iter() - .map(|v| v.as_i64().map(|v| v as i16)) - .collect(); - Ok(Series::new(name, series_values?)) - } - DataType::Int32 => { - let series_values: Result, _> = column - .values - .iter() - .map(|v| v.as_i64().map(|v| v as i32)) - .collect(); - Ok(Series::new(name, series_values?)) - } - DataType::Int64 => { - let series_values: Result, _> = - column.values.iter().map(|v| v.as_i64()).collect(); - Ok(Series::new(name, series_values?)) - } - DataType::Boolean => { - let series_values: Result, _> = - column.values.iter().map(|v| v.as_bool()).collect(); - Ok(Series::new(name, series_values?)) - } - DataType::String => { - let series_values: Result, _> = - column.values.iter().map(|v| v.coerce_string()).collect(); - Ok(Series::new(name, series_values?)) - } - DataType::Object(_, _) => value_to_series(name, &column.values), - DataType::Duration(time_unit) => { - //todo - finish type conversion - let series_values: Result, _> = column - .values - .iter() - .map(|v| v.as_i64().map(|v| nanos_from_timeunit(v, *time_unit))) - .collect(); - Ok(Series::new(name, series_values?)) - } - DataType::List(list_type) => { - match input_type_list_to_series(name, list_type.as_ref(), &column.values) { - Ok(series) => Ok(series), - Err(_) => { - // An error case will occur when there are lists of mixed types. - // If this happens, fallback to object list - input_type_list_to_series( - name, - &DataType::Object("unknown", None), - &column.values, - ) - } - } - } - DataType::Date => { - let it = column.values.iter().map(|v| { - if let Value::Date { val, .. } = &v { - Some(val.timestamp_nanos_opt().unwrap_or_default()) - } else { - None - } - }); - - let res: DatetimeChunked = ChunkedArray::::from_iter_options(name, it) - .into_datetime(TimeUnit::Nanoseconds, None); - - Ok(res.into_series()) - } - DataType::Datetime(tu, maybe_tz) => { - let dates = column - .values - .iter() - .map(|v| { - if let Value::Date { val, .. } = &v { - // If there is a timezone specified, make sure - // the value is converted to it - Ok(maybe_tz - .as_ref() - .map(|tz| tz.parse::().map(|tz| val.with_timezone(&tz))) - .transpose() - .map_err(|e| ShellError::GenericError { - error: "Error parsing timezone".into(), - msg: "".into(), - span: None, - help: Some(e.to_string()), - inner: vec![], - })? - .and_then(|dt| dt.timestamp_nanos_opt()) - .map(|nanos| nanos_from_timeunit(nanos, *tu))) - } else { - Ok(None) - } - }) - .collect::>, ShellError>>()?; - - let res: DatetimeChunked = - ChunkedArray::::from_iter_options(name, dates.into_iter()) - .into_datetime(*tu, maybe_tz.clone()); - - Ok(res.into_series()) - } - DataType::Struct(fields) => { - let schema = Some(NuSchema::new(Schema::from_iter(fields.clone()))); - let mut structs: Vec = Vec::new(); - - for v in column.values.iter() { - let mut column_values: ColumnMap = IndexMap::new(); - let record = v.as_record()?; - insert_record(&mut column_values, record.clone(), &schema)?; - let df = from_parsed_columns(column_values)?; - structs.push(df.as_series(Span::unknown())?); - } - - let chunked = StructChunked::new(column.name(), structs.as_ref()).map_err(|e| { - ShellError::GenericError { - error: format!("Error creating struct: {e}"), - msg: "".into(), - span: None, - help: None, - inner: vec![], - } - })?; - Ok(chunked.into_series()) - } - _ => Err(ShellError::GenericError { - error: format!("Error creating dataframe: Unsupported type: {column_type:?}"), - msg: "".into(), - span: None, - help: None, - inner: vec![], - }), - } - } else { - Err(ShellError::GenericError { - error: "Passed a type column with no type".into(), - msg: "".into(), - span: None, - help: None, - inner: vec![], - }) - } -} - -// The ColumnMap has the parsed data from the StreamInput -// This data can be used to create a Series object that can initialize -// the dataframe based on the type of data that is found -pub fn from_parsed_columns(column_values: ColumnMap) -> Result { - let mut df_series: Vec = Vec::new(); - for (name, column) in column_values { - let series = typed_column_to_series(&name, column)?; - df_series.push(series); - } - - DataFrame::new(df_series) - .map(|df| NuDataFrame::new(false, df)) - .map_err(|e| ShellError::GenericError { - error: "Error creating dataframe".into(), - msg: e.to_string(), - span: None, - help: None, - inner: vec![], - }) -} - -fn value_to_series(name: &str, values: &[Value]) -> Result { - let mut builder = ObjectChunkedBuilder::::new(name, values.len()); - - for v in values { - builder.append_value(DataFrameValue::new(v.clone())); - } - - let res = builder.finish(); - Ok(res.into_series()) -} - -fn input_type_list_to_series( - name: &str, - data_type: &DataType, - values: &[Value], -) -> Result { - let inconsistent_error = |_| ShellError::GenericError { - error: format!( - "column {name} contains a list with inconsistent types: Expecting: {data_type:?}" - ), - msg: "".into(), - span: None, - help: None, - inner: vec![], - }; - - macro_rules! primitive_list_series { - ($list_type:ty, $vec_type:tt) => {{ - let mut builder = ListPrimitiveChunkedBuilder::<$list_type>::new( - name, - values.len(), - VALUES_CAPACITY, - data_type.clone(), - ); - - for v in values { - let value_list = v - .as_list()? - .iter() - .map(|v| value_to_primitive!(v, $vec_type)) - .collect::, _>>() - .map_err(inconsistent_error)?; - builder.append_iter_values(value_list.iter().copied()); - } - let res = builder.finish(); - Ok(res.into_series()) - }}; - } - - match *data_type { - // list of boolean values - DataType::Boolean => { - let mut builder = ListBooleanChunkedBuilder::new(name, values.len(), VALUES_CAPACITY); - for v in values { - let value_list = v - .as_list()? - .iter() - .map(|v| v.as_bool()) - .collect::, _>>() - .map_err(inconsistent_error)?; - builder.append_iter(value_list.iter().map(|v| Some(*v))); - } - let res = builder.finish(); - Ok(res.into_series()) - } - DataType::Duration(_) => primitive_list_series!(Int64Type, i64), - DataType::UInt8 => primitive_list_series!(UInt8Type, u8), - DataType::UInt16 => primitive_list_series!(UInt16Type, u16), - DataType::UInt32 => primitive_list_series!(UInt32Type, u32), - DataType::UInt64 => primitive_list_series!(UInt64Type, u64), - DataType::Int8 => primitive_list_series!(Int8Type, i8), - DataType::Int16 => primitive_list_series!(Int16Type, i16), - DataType::Int32 => primitive_list_series!(Int32Type, i32), - DataType::Int64 => primitive_list_series!(Int64Type, i64), - DataType::Float32 => primitive_list_series!(Float32Type, f32), - DataType::Float64 => primitive_list_series!(Float64Type, f64), - DataType::String => { - let mut builder = ListStringChunkedBuilder::new(name, values.len(), VALUES_CAPACITY); - for v in values { - let value_list = v - .as_list()? - .iter() - .map(|v| v.coerce_string()) - .collect::, _>>() - .map_err(inconsistent_error)?; - builder.append_values_iter(value_list.iter().map(AsRef::as_ref)); - } - let res = builder.finish(); - Ok(res.into_series()) - } - DataType::Date => { - let mut builder = AnonymousOwnedListBuilder::new( - name, - values.len(), - Some(DataType::Datetime(TimeUnit::Nanoseconds, None)), - ); - for (i, v) in values.iter().enumerate() { - let list_name = i.to_string(); - - let it = v.as_list()?.iter().map(|v| { - if let Value::Date { val, .. } = &v { - Some(val.timestamp_nanos_opt().unwrap_or_default()) - } else { - None - } - }); - let dt_chunked = ChunkedArray::::from_iter_options(&list_name, it) - .into_datetime(TimeUnit::Nanoseconds, None); - - builder - .append_series(&dt_chunked.into_series()) - .map_err(|e| ShellError::GenericError { - error: "Error appending to series".into(), - msg: "".into(), - span: None, - help: Some(e.to_string()), - inner: vec![], - })? - } - let res = builder.finish(); - Ok(res.into_series()) - } - DataType::List(ref sub_list_type) => { - Ok(input_type_list_to_series(name, sub_list_type, values)?) - } - // treat everything else as an object - _ => Ok(value_to_series(name, values)?), - } -} - -fn series_to_values( - series: &Series, - maybe_from_row: Option, - maybe_size: Option, - span: Span, -) -> Result, ShellError> { - match series.dtype() { - DataType::Null => { - let it = std::iter::repeat(Value::nothing(span)); - let values = if let Some(size) = maybe_size { - Either::Left(it.take(size)) - } else { - Either::Right(it) - } - .collect::>(); - - Ok(values) - } - DataType::UInt8 => { - let casted = series.u8().map_err(|e| ShellError::GenericError { - error: "Error casting column to u8".into(), - msg: "".into(), - span: None, - help: Some(e.to_string()), - inner: vec![], - })?; - - let it = casted.into_iter(); - let values = if let (Some(size), Some(from_row)) = (maybe_size, maybe_from_row) { - Either::Left(it.skip(from_row).take(size)) - } else { - Either::Right(it) - } - .map(|v| match v { - Some(a) => Value::int(a as i64, span), - None => Value::nothing(span), - }) - .collect::>(); - - Ok(values) - } - DataType::UInt16 => { - let casted = series.u16().map_err(|e| ShellError::GenericError { - error: "Error casting column to u16".into(), - msg: "".into(), - span: None, - help: Some(e.to_string()), - inner: vec![], - })?; - - let it = casted.into_iter(); - let values = if let (Some(size), Some(from_row)) = (maybe_size, maybe_from_row) { - Either::Left(it.skip(from_row).take(size)) - } else { - Either::Right(it) - } - .map(|v| match v { - Some(a) => Value::int(a as i64, span), - None => Value::nothing(span), - }) - .collect::>(); - - Ok(values) - } - DataType::UInt32 => { - let casted = series.u32().map_err(|e| ShellError::GenericError { - error: "Error casting column to u32".into(), - msg: "".into(), - span: None, - help: Some(e.to_string()), - inner: vec![], - })?; - - let it = casted.into_iter(); - let values = if let (Some(size), Some(from_row)) = (maybe_size, maybe_from_row) { - Either::Left(it.skip(from_row).take(size)) - } else { - Either::Right(it) - } - .map(|v| match v { - Some(a) => Value::int(a as i64, span), - None => Value::nothing(span), - }) - .collect::>(); - - Ok(values) - } - DataType::UInt64 => { - let casted = series.u64().map_err(|e| ShellError::GenericError { - error: "Error casting column to u64".into(), - msg: "".into(), - span: None, - help: Some(e.to_string()), - inner: vec![], - })?; - - let it = casted.into_iter(); - let values = if let (Some(size), Some(from_row)) = (maybe_size, maybe_from_row) { - Either::Left(it.skip(from_row).take(size)) - } else { - Either::Right(it) - } - .map(|v| match v { - Some(a) => Value::int(a as i64, span), - None => Value::nothing(span), - }) - .collect::>(); - - Ok(values) - } - DataType::Int8 => { - let casted = series.i8().map_err(|e| ShellError::GenericError { - error: "Error casting column to i8".into(), - msg: "".into(), - span: None, - help: Some(e.to_string()), - inner: vec![], - })?; - - let it = casted.into_iter(); - let values = if let (Some(size), Some(from_row)) = (maybe_size, maybe_from_row) { - Either::Left(it.skip(from_row).take(size)) - } else { - Either::Right(it) - } - .map(|v| match v { - Some(a) => Value::int(a as i64, span), - None => Value::nothing(span), - }) - .collect::>(); - - Ok(values) - } - DataType::Int16 => { - let casted = series.i16().map_err(|e| ShellError::GenericError { - error: "Error casting column to i16".into(), - msg: "".into(), - span: None, - help: Some(e.to_string()), - inner: vec![], - })?; - - let it = casted.into_iter(); - let values = if let (Some(size), Some(from_row)) = (maybe_size, maybe_from_row) { - Either::Left(it.skip(from_row).take(size)) - } else { - Either::Right(it) - } - .map(|v| match v { - Some(a) => Value::int(a as i64, span), - None => Value::nothing(span), - }) - .collect::>(); - - Ok(values) - } - DataType::Int32 => { - let casted = series.i32().map_err(|e| ShellError::GenericError { - error: "Error casting column to i32".into(), - msg: "".into(), - span: None, - help: Some(e.to_string()), - inner: vec![], - })?; - - let it = casted.into_iter(); - let values = if let (Some(size), Some(from_row)) = (maybe_size, maybe_from_row) { - Either::Left(it.skip(from_row).take(size)) - } else { - Either::Right(it) - } - .map(|v| match v { - Some(a) => Value::int(a as i64, span), - None => Value::nothing(span), - }) - .collect::>(); - - Ok(values) - } - DataType::Int64 => { - let casted = series.i64().map_err(|e| ShellError::GenericError { - error: "Error casting column to i64".into(), - msg: "".into(), - span: None, - help: Some(e.to_string()), - inner: vec![], - })?; - - let it = casted.into_iter(); - let values = if let (Some(size), Some(from_row)) = (maybe_size, maybe_from_row) { - Either::Left(it.skip(from_row).take(size)) - } else { - Either::Right(it) - } - .map(|v| match v { - Some(a) => Value::int(a, span), - None => Value::nothing(span), - }) - .collect::>(); - - Ok(values) - } - DataType::Float32 => { - let casted = series.f32().map_err(|e| ShellError::GenericError { - error: "Error casting column to f32".into(), - msg: "".into(), - span: None, - help: Some(e.to_string()), - inner: vec![], - })?; - - let it = casted.into_iter(); - let values = if let (Some(size), Some(from_row)) = (maybe_size, maybe_from_row) { - Either::Left(it.skip(from_row).take(size)) - } else { - Either::Right(it) - } - .map(|v| match v { - Some(a) => Value::float(a as f64, span), - None => Value::nothing(span), - }) - .collect::>(); - - Ok(values) - } - DataType::Float64 => { - let casted = series.f64().map_err(|e| ShellError::GenericError { - error: "Error casting column to f64".into(), - msg: "".into(), - span: None, - help: Some(e.to_string()), - inner: vec![], - })?; - - let it = casted.into_iter(); - let values = if let (Some(size), Some(from_row)) = (maybe_size, maybe_from_row) { - Either::Left(it.skip(from_row).take(size)) - } else { - Either::Right(it) - } - .map(|v| match v { - Some(a) => Value::float(a, span), - None => Value::nothing(span), - }) - .collect::>(); - - Ok(values) - } - DataType::Boolean => { - let casted = series.bool().map_err(|e| ShellError::GenericError { - error: "Error casting column to bool".into(), - msg: "".into(), - span: None, - help: Some(e.to_string()), - inner: vec![], - })?; - - let it = casted.into_iter(); - let values = if let (Some(size), Some(from_row)) = (maybe_size, maybe_from_row) { - Either::Left(it.skip(from_row).take(size)) - } else { - Either::Right(it) - } - .map(|v| match v { - Some(a) => Value::bool(a, span), - None => Value::nothing(span), - }) - .collect::>(); - - Ok(values) - } - DataType::String => { - let casted = series.str().map_err(|e| ShellError::GenericError { - error: "Error casting column to string".into(), - msg: "".into(), - span: None, - help: Some(e.to_string()), - inner: vec![], - })?; - - let it = casted.into_iter(); - let values = if let (Some(size), Some(from_row)) = (maybe_size, maybe_from_row) { - Either::Left(it.skip(from_row).take(size)) - } else { - Either::Right(it) - } - .map(|v| match v { - Some(a) => Value::string(a.to_string(), span), - None => Value::nothing(span), - }) - .collect::>(); - - Ok(values) - } - DataType::Object(x, _) => { - let casted = series - .as_any() - .downcast_ref::>>(); - - match casted { - None => Err(ShellError::GenericError { - error: "Error casting object from series".into(), - msg: "".into(), - span: None, - help: Some(format!("Object not supported for conversion: {x}")), - inner: vec![], - }), - Some(ca) => { - let it = ca.into_iter(); - let values = if let (Some(size), Some(from_row)) = (maybe_size, maybe_from_row) - { - Either::Left(it.skip(from_row).take(size)) - } else { - Either::Right(it) - } - .map(|v| match v { - Some(a) => a.get_value(), - None => Value::nothing(span), - }) - .collect::>(); - - Ok(values) - } - } - } - DataType::List(x) => { - let casted = series.as_any().downcast_ref::>(); - match casted { - None => Err(ShellError::GenericError { - error: "Error casting list from series".into(), - msg: "".into(), - span: None, - help: Some(format!("List not supported for conversion: {x}")), - inner: vec![], - }), - Some(ca) => { - let it = ca.into_iter(); - if let (Some(size), Some(from_row)) = (maybe_size, maybe_from_row) { - Either::Left(it.skip(from_row).take(size)) - } else { - Either::Right(it) - } - .map(|ca| { - let sublist: Vec = if let Some(ref s) = ca { - series_to_values(s, None, None, Span::unknown())? - } else { - // empty item - vec![] - }; - Ok(Value::list(sublist, span)) - }) - .collect::, ShellError>>() - } - } - } - DataType::Date => { - let casted = series.date().map_err(|e| ShellError::GenericError { - error: "Error casting column to date".into(), - msg: "".into(), - span: None, - help: Some(e.to_string()), - inner: vec![], - })?; - - let it = casted.into_iter(); - let values = if let (Some(size), Some(from_row)) = (maybe_size, maybe_from_row) { - Either::Left(it.skip(from_row).take(size)) - } else { - Either::Right(it) - } - .map(|v| match v { - Some(a) => { - let nanos = nanos_per_day(a); - let datetime = datetime_from_epoch_nanos(nanos, &None, span)?; - Ok(Value::date(datetime, span)) - } - None => Ok(Value::nothing(span)), - }) - .collect::, ShellError>>()?; - Ok(values) - } - DataType::Datetime(time_unit, tz) => { - let casted = series.datetime().map_err(|e| ShellError::GenericError { - error: "Error casting column to datetime".into(), - msg: "".into(), - span: None, - help: Some(e.to_string()), - inner: vec![], - })?; - - let it = casted.into_iter(); - let values = if let (Some(size), Some(from_row)) = (maybe_size, maybe_from_row) { - Either::Left(it.skip(from_row).take(size)) - } else { - Either::Right(it) - } - .map(|v| match v { - Some(a) => { - // elapsed time in nano/micro/milliseconds since 1970-01-01 - let nanos = nanos_from_timeunit(a, *time_unit); - let datetime = datetime_from_epoch_nanos(nanos, tz, span)?; - Ok(Value::date(datetime, span)) - } - None => Ok(Value::nothing(span)), - }) - .collect::, ShellError>>()?; - Ok(values) - } - DataType::Struct(polar_fields) => { - let casted = series.struct_().map_err(|e| ShellError::GenericError { - error: "Error casting column to struct".into(), - msg: "".to_string(), - span: None, - help: Some(e.to_string()), - inner: Vec::new(), - })?; - let it = casted.into_iter(); - let values: Result, ShellError> = - if let (Some(size), Some(from_row)) = (maybe_size, maybe_from_row) { - Either::Left(it.skip(from_row).take(size)) - } else { - Either::Right(it) - } - .map(|any_values| { - let record = polar_fields - .iter() - .zip(any_values) - .map(|(field, val)| { - any_value_to_value(val, span).map(|val| (field.name.to_string(), val)) - }) - .collect::>()?; - - Ok(Value::record(record, span)) - }) - .collect(); - values - } - DataType::Time => { - let casted = - series - .timestamp(TimeUnit::Nanoseconds) - .map_err(|e| ShellError::GenericError { - error: "Error casting column to time".into(), - msg: "".into(), - span: None, - help: Some(e.to_string()), - inner: vec![], - })?; - - let it = casted.into_iter(); - let values = if let (Some(size), Some(from_row)) = (maybe_size, maybe_from_row) { - Either::Left(it.skip(from_row).take(size)) - } else { - Either::Right(it) - } - .map(|v| match v { - Some(nanoseconds) => Value::duration(nanoseconds, span), - None => Value::nothing(span), - }) - .collect::>(); - - Ok(values) - } - e => Err(ShellError::GenericError { - error: "Error creating Dataframe".into(), - msg: "".to_string(), - span: None, - help: Some(format!("Value not supported in nushell: {e}")), - inner: vec![], - }), - } -} - -fn any_value_to_value(any_value: &AnyValue, span: Span) -> Result { - match any_value { - AnyValue::Null => Ok(Value::nothing(span)), - AnyValue::Boolean(b) => Ok(Value::bool(*b, span)), - AnyValue::String(s) => Ok(Value::string(s.to_string(), span)), - AnyValue::UInt8(i) => Ok(Value::int(*i as i64, span)), - AnyValue::UInt16(i) => Ok(Value::int(*i as i64, span)), - AnyValue::UInt32(i) => Ok(Value::int(*i as i64, span)), - AnyValue::UInt64(i) => Ok(Value::int(*i as i64, span)), - AnyValue::Int8(i) => Ok(Value::int(*i as i64, span)), - AnyValue::Int16(i) => Ok(Value::int(*i as i64, span)), - AnyValue::Int32(i) => Ok(Value::int(*i as i64, span)), - AnyValue::Int64(i) => Ok(Value::int(*i, span)), - AnyValue::Float32(f) => Ok(Value::float(*f as f64, span)), - AnyValue::Float64(f) => Ok(Value::float(*f, span)), - AnyValue::Date(d) => { - let nanos = nanos_per_day(*d); - datetime_from_epoch_nanos(nanos, &None, span) - .map(|datetime| Value::date(datetime, span)) - } - AnyValue::Datetime(a, time_unit, tz) => { - let nanos = nanos_from_timeunit(*a, *time_unit); - datetime_from_epoch_nanos(nanos, tz, span).map(|datetime| Value::date(datetime, span)) - } - AnyValue::Duration(a, time_unit) => { - let nanos = match time_unit { - TimeUnit::Nanoseconds => *a, - TimeUnit::Microseconds => *a * 1_000, - TimeUnit::Milliseconds => *a * 1_000_000, - }; - Ok(Value::duration(nanos, span)) - } - // AnyValue::Time represents the current time since midnight. - // Unfortunately, there is no timezone related information. - // Given this, calculate the current date from UTC and add the time. - AnyValue::Time(nanos) => time_from_midnight(*nanos, span), - AnyValue::List(series) => { - series_to_values(series, None, None, span).map(|values| Value::list(values, span)) - } - AnyValue::Struct(_idx, _struct_array, _s_fields) => { - // This should convert to a StructOwned object. - let static_value = - any_value - .clone() - .into_static() - .map_err(|e| ShellError::GenericError { - error: "Cannot convert polars struct to static value".into(), - msg: e.to_string(), - span: Some(span), - help: None, - inner: Vec::new(), - })?; - any_value_to_value(&static_value, span) - } - AnyValue::StructOwned(struct_tuple) => { - let record = struct_tuple - .1 - .iter() - .zip(&struct_tuple.0) - .map(|(field, val)| { - any_value_to_value(val, span).map(|val| (field.name.to_string(), val)) - }) - .collect::>()?; - - Ok(Value::record(record, span)) - } - AnyValue::StringOwned(s) => Ok(Value::string(s.to_string(), span)), - AnyValue::Binary(bytes) => Ok(Value::binary(*bytes, span)), - AnyValue::BinaryOwned(bytes) => Ok(Value::binary(bytes.to_owned(), span)), - e => Err(ShellError::GenericError { - error: "Error creating Value".into(), - msg: "".to_string(), - span: None, - help: Some(format!("Value not supported in nushell: {e}")), - inner: Vec::new(), - }), - } -} - -fn nanos_per_day(days: i32) -> i64 { - days as i64 * NANOS_PER_DAY -} - -fn nanos_from_timeunit(a: i64, time_unit: TimeUnit) -> i64 { - a * match time_unit { - TimeUnit::Microseconds => 1_000, // Convert microseconds to nanoseconds - TimeUnit::Milliseconds => 1_000_000, // Convert milliseconds to nanoseconds - TimeUnit::Nanoseconds => 1, // Already in nanoseconds - } -} - -fn datetime_from_epoch_nanos( - nanos: i64, - timezone: &Option, - span: Span, -) -> Result, ShellError> { - let tz: Tz = if let Some(polars_tz) = timezone { - polars_tz - .parse::() - .map_err(|_| ShellError::GenericError { - error: format!("Could not parse polars timezone: {polars_tz}"), - msg: "".to_string(), - span: Some(span), - help: None, - inner: vec![], - })? - } else { - Tz::UTC - }; - - Ok(tz.timestamp_nanos(nanos).fixed_offset()) -} - -fn time_from_midnight(nanos: i64, span: Span) -> Result { - let today = Utc::now().date_naive(); - NaiveTime::from_hms_opt(0, 0, 0) // midnight - .map(|time| time + Duration::nanoseconds(nanos)) // current time - .map(|time| today.and_time(time)) // current date and time - .and_then(|datetime| { - FixedOffset::east_opt(0) // utc - .map(|offset| { - DateTime::::from_naive_utc_and_offset(datetime, offset) - }) - }) - .map(|datetime| Value::date(datetime, span)) // current date and time - .ok_or(ShellError::CantConvert { - to_type: "datetime".to_string(), - from_type: "polars time".to_string(), - span, - help: Some("Could not convert polars time of {nanos} to datetime".to_string()), - }) -} - -#[cfg(test)] -mod tests { - use indexmap::indexmap; - use nu_protocol::record; - use polars::export::arrow::array::{BooleanArray, PrimitiveArray}; - use polars::prelude::Field; - use polars_io::prelude::StructArray; - - use super::*; - - #[test] - fn test_parsed_column_string_list() -> Result<(), Box> { - let values = vec![ - Value::list( - vec![Value::string("bar".to_string(), Span::test_data())], - Span::test_data(), - ), - Value::list( - vec![Value::string("baz".to_string(), Span::test_data())], - Span::test_data(), - ), - ]; - let column = Column { - name: "foo".to_string(), - values: values.clone(), - }; - let typed_column = TypedColumn { - column, - column_type: Some(DataType::List(Box::new(DataType::String))), - }; - - let column_map = indexmap!("foo".to_string() => typed_column); - let parsed_df = from_parsed_columns(column_map)?; - let parsed_columns = parsed_df.columns(Span::test_data())?; - assert_eq!(parsed_columns.len(), 1); - let column = parsed_columns - .first() - .expect("There should be a first value in columns"); - assert_eq!(column.name(), "foo"); - assert_eq!(column.values, values); - - Ok(()) - } - - #[test] - fn test_any_value_to_value() -> Result<(), Box> { - let span = Span::test_data(); - assert_eq!( - any_value_to_value(&AnyValue::Null, span)?, - Value::nothing(span) - ); - - let test_bool = true; - assert_eq!( - any_value_to_value(&AnyValue::Boolean(test_bool), span)?, - Value::bool(test_bool, span) - ); - - let test_str = "foo"; - assert_eq!( - any_value_to_value(&AnyValue::String(test_str), span)?, - Value::string(test_str.to_string(), span) - ); - assert_eq!( - any_value_to_value(&AnyValue::StringOwned(test_str.into()), span)?, - Value::string(test_str.to_owned(), span) - ); - - let tests_uint8 = 4; - assert_eq!( - any_value_to_value(&AnyValue::UInt8(tests_uint8), span)?, - Value::int(tests_uint8 as i64, span) - ); - - let tests_uint16 = 233; - assert_eq!( - any_value_to_value(&AnyValue::UInt16(tests_uint16), span)?, - Value::int(tests_uint16 as i64, span) - ); - - let tests_uint32 = 897688233; - assert_eq!( - any_value_to_value(&AnyValue::UInt32(tests_uint32), span)?, - Value::int(tests_uint32 as i64, span) - ); - - let tests_uint64 = 903225135897388233; - assert_eq!( - any_value_to_value(&AnyValue::UInt64(tests_uint64), span)?, - Value::int(tests_uint64 as i64, span) - ); - - let tests_float32 = 903225135897388233.3223353; - assert_eq!( - any_value_to_value(&AnyValue::Float32(tests_float32), span)?, - Value::float(tests_float32 as f64, span) - ); - - let tests_float64 = 9064251358973882322333.64233533232; - assert_eq!( - any_value_to_value(&AnyValue::Float64(tests_float64), span)?, - Value::float(tests_float64, span) - ); - - let test_days = 10_957; - let comparison_date = Utc - .with_ymd_and_hms(2000, 1, 1, 0, 0, 0) - .unwrap() - .fixed_offset(); - assert_eq!( - any_value_to_value(&AnyValue::Date(test_days), span)?, - Value::date(comparison_date, span) - ); - - let test_millis = 946_684_800_000; - assert_eq!( - any_value_to_value( - &AnyValue::Datetime(test_millis, TimeUnit::Milliseconds, &None), - span - )?, - Value::date(comparison_date, span) - ); - - let test_duration_millis = 99_999; - let test_duration_micros = 99_999_000; - let test_duration_nanos = 99_999_000_000; - assert_eq!( - any_value_to_value( - &AnyValue::Duration(test_duration_nanos, TimeUnit::Nanoseconds), - span - )?, - Value::duration(test_duration_nanos, span) - ); - assert_eq!( - any_value_to_value( - &AnyValue::Duration(test_duration_micros, TimeUnit::Microseconds), - span - )?, - Value::duration(test_duration_nanos, span) - ); - assert_eq!( - any_value_to_value( - &AnyValue::Duration(test_duration_millis, TimeUnit::Milliseconds), - span - )?, - Value::duration(test_duration_nanos, span) - ); - - let test_binary = b"sdf2332f32q3f3afwaf3232f32"; - assert_eq!( - any_value_to_value(&AnyValue::Binary(test_binary), span)?, - Value::binary(test_binary.to_vec(), span) - ); - assert_eq!( - any_value_to_value(&AnyValue::BinaryOwned(test_binary.to_vec()), span)?, - Value::binary(test_binary.to_vec(), span) - ); - - let test_time_nanos = 54_000_000_000_000; - let test_time = DateTime::::from_naive_utc_and_offset( - Utc::now() - .date_naive() - .and_time(NaiveTime::from_hms_opt(15, 00, 00).unwrap()), - FixedOffset::east_opt(0).unwrap(), - ); - assert_eq!( - any_value_to_value(&AnyValue::Time(test_time_nanos), span)?, - Value::date(test_time, span) - ); - - let test_list_series = Series::new("int series", &[1, 2, 3]); - let comparison_list_series = Value::list( - vec![ - Value::int(1, span), - Value::int(2, span), - Value::int(3, span), - ], - span, - ); - assert_eq!( - any_value_to_value(&AnyValue::List(test_list_series), span)?, - comparison_list_series - ); - - let field_value_0 = AnyValue::Int32(1); - let field_value_1 = AnyValue::Boolean(true); - let values = vec![field_value_0, field_value_1]; - let field_name_0 = "num_field"; - let field_name_1 = "bool_field"; - let fields = vec![ - Field::new(field_name_0, DataType::Int32), - Field::new(field_name_1, DataType::Boolean), - ]; - let test_owned_struct = AnyValue::StructOwned(Box::new((values, fields.clone()))); - let comparison_owned_record = Value::test_record(record!( - field_name_0 => Value::int(1, span), - field_name_1 => Value::bool(true, span), - )); - assert_eq!( - any_value_to_value(&test_owned_struct, span)?, - comparison_owned_record.clone() - ); - - let test_int_arr = PrimitiveArray::from([Some(1_i32)]); - let test_bool_arr = BooleanArray::from([Some(true)]); - let test_struct_arr = StructArray::new( - DataType::Struct(fields.clone()).to_arrow(true), - vec![Box::new(test_int_arr), Box::new(test_bool_arr)], - None, - ); - assert_eq!( - any_value_to_value( - &AnyValue::Struct(0, &test_struct_arr, fields.as_slice()), - span - )?, - comparison_owned_record - ); - - Ok(()) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/values/nu_dataframe/custom_value.rs b/crates/nu-cmd-dataframe/src/dataframe/values/nu_dataframe/custom_value.rs deleted file mode 100644 index da8b27398b..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/values/nu_dataframe/custom_value.rs +++ /dev/null @@ -1,79 +0,0 @@ -use super::NuDataFrame; -use nu_protocol::{ast::Operator, CustomValue, ShellError, Span, Value}; - -// CustomValue implementation for NuDataFrame -impl CustomValue for NuDataFrame { - fn typetag_name(&self) -> &'static str { - "dataframe" - } - - fn typetag_deserialize(&self) { - unimplemented!("typetag_deserialize") - } - - fn clone_value(&self, span: nu_protocol::Span) -> Value { - let cloned = NuDataFrame { - df: self.df.clone(), - from_lazy: false, - }; - - Value::custom(Box::new(cloned), span) - } - - fn type_name(&self) -> String { - self.typetag_name().to_string() - } - - fn to_base_value(&self, span: Span) -> Result { - let vals = self.print(span)?; - - Ok(Value::list(vals, span)) - } - - fn as_any(&self) -> &dyn std::any::Any { - self - } - - fn as_mut_any(&mut self) -> &mut dyn std::any::Any { - self - } - - fn follow_path_int( - &self, - _self_span: Span, - count: usize, - path_span: Span, - ) -> Result { - self.get_value(count, path_span) - } - - fn follow_path_string( - &self, - _self_span: Span, - column_name: String, - path_span: Span, - ) -> Result { - let column = self.column(&column_name, path_span)?; - Ok(column.into_value(path_span)) - } - - fn partial_cmp(&self, other: &Value) -> Option { - match other { - Value::Custom { val, .. } => val - .as_any() - .downcast_ref::() - .and_then(|other| self.is_equal(other)), - _ => None, - } - } - - fn operation( - &self, - lhs_span: Span, - operator: Operator, - op: Span, - right: &Value, - ) -> Result { - self.compute_with_value(lhs_span, operator, op, right) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/values/nu_dataframe/mod.rs b/crates/nu-cmd-dataframe/src/dataframe/values/nu_dataframe/mod.rs deleted file mode 100644 index 967e03580f..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/values/nu_dataframe/mod.rs +++ /dev/null @@ -1,580 +0,0 @@ -mod between_values; -mod conversion; -mod custom_value; -mod operations; - -pub use conversion::{Column, ColumnMap}; -pub use operations::Axis; - -use super::{nu_schema::NuSchema, utils::DEFAULT_ROWS, NuLazyFrame}; -use indexmap::IndexMap; -use nu_protocol::{did_you_mean, PipelineData, Record, ShellError, Span, Value}; -use polars::{ - chunked_array::ops::SortMultipleOptions, - prelude::{DataFrame, DataType, IntoLazy, LazyFrame, PolarsObject, Series}, -}; -use polars_plan::prelude::{lit, Expr, Null}; -use polars_utils::total_ord::{TotalEq, TotalHash}; -use serde::{Deserialize, Serialize}; -use std::{ - cmp::Ordering, - collections::HashSet, - fmt::Display, - hash::{Hash, Hasher}, -}; - -// DataFrameValue is an encapsulation of Nushell Value that can be used -// to define the PolarsObject Trait. The polars object trait allows to -// create dataframes with mixed datatypes -#[derive(Clone, Debug)] -pub struct DataFrameValue(Value); - -impl DataFrameValue { - fn new(value: Value) -> Self { - Self(value) - } - - fn get_value(&self) -> Value { - self.0.clone() - } -} - -impl TotalHash for DataFrameValue { - fn tot_hash(&self, state: &mut H) - where - H: Hasher, - { - (*self).hash(state) - } -} - -impl Display for DataFrameValue { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.0.get_type()) - } -} - -impl Default for DataFrameValue { - fn default() -> Self { - Self(Value::nothing(Span::unknown())) - } -} - -impl PartialEq for DataFrameValue { - fn eq(&self, other: &Self) -> bool { - self.0.partial_cmp(&other.0).map_or(false, Ordering::is_eq) - } -} -impl Eq for DataFrameValue {} - -impl Hash for DataFrameValue { - fn hash(&self, state: &mut H) { - match &self.0 { - Value::Nothing { .. } => 0.hash(state), - Value::Int { val, .. } => val.hash(state), - Value::String { val, .. } => val.hash(state), - // TODO. Define hash for the rest of types - _ => {} - } - } -} - -impl TotalEq for DataFrameValue { - fn tot_eq(&self, other: &Self) -> bool { - self == other - } -} - -impl PolarsObject for DataFrameValue { - fn type_name() -> &'static str { - "object" - } -} - -#[derive(Debug, Serialize, Deserialize)] -pub struct NuDataFrame { - pub df: DataFrame, - pub from_lazy: bool, -} - -impl AsRef for NuDataFrame { - fn as_ref(&self) -> &polars::prelude::DataFrame { - &self.df - } -} - -impl AsMut for NuDataFrame { - fn as_mut(&mut self) -> &mut polars::prelude::DataFrame { - &mut self.df - } -} - -impl From for NuDataFrame { - fn from(df: DataFrame) -> Self { - Self { - df, - from_lazy: false, - } - } -} - -impl NuDataFrame { - pub fn new(from_lazy: bool, df: DataFrame) -> Self { - Self { df, from_lazy } - } - - pub fn lazy(&self) -> LazyFrame { - self.df.clone().lazy() - } - - fn default_value(span: Span) -> Value { - let dataframe = DataFrame::default(); - NuDataFrame::dataframe_into_value(dataframe, span) - } - - pub fn dataframe_into_value(dataframe: DataFrame, span: Span) -> Value { - Value::custom(Box::new(Self::new(false, dataframe)), span) - } - - pub fn into_value(self, span: Span) -> Value { - if self.from_lazy { - let lazy = NuLazyFrame::from_dataframe(self); - Value::custom(Box::new(lazy), span) - } else { - Value::custom(Box::new(self), span) - } - } - - pub fn series_to_value(series: Series, span: Span) -> Result { - match DataFrame::new(vec![series]) { - Ok(dataframe) => Ok(NuDataFrame::dataframe_into_value(dataframe, span)), - Err(e) => Err(ShellError::GenericError { - error: "Error creating dataframe".into(), - msg: e.to_string(), - span: Some(span), - help: None, - inner: vec![], - }), - } - } - - pub fn try_from_iter(iter: T, maybe_schema: Option) -> Result - where - T: Iterator, - { - // Dictionary to store the columnar data extracted from - // the input. During the iteration we check if the values - // have different type - let mut column_values: ColumnMap = IndexMap::new(); - - for value in iter { - match value { - Value::Custom { .. } => return Self::try_from_value(value), - Value::List { vals, .. } => { - let record = vals - .into_iter() - .enumerate() - .map(|(i, val)| (format!("{i}"), val)) - .collect(); - - conversion::insert_record(&mut column_values, record, &maybe_schema)? - } - Value::Record { val: record, .. } => conversion::insert_record( - &mut column_values, - record.into_owned(), - &maybe_schema, - )?, - _ => { - let key = "0".to_string(); - conversion::insert_value(value, key, &mut column_values, &maybe_schema)? - } - } - } - - let df = conversion::from_parsed_columns(column_values)?; - add_missing_columns(df, &maybe_schema, Span::unknown()) - } - - pub fn try_from_series(columns: Vec, span: Span) -> Result { - let dataframe = DataFrame::new(columns).map_err(|e| ShellError::GenericError { - error: "Error creating dataframe".into(), - msg: format!("Unable to create DataFrame: {e}"), - span: Some(span), - help: None, - inner: vec![], - })?; - - Ok(Self::new(false, dataframe)) - } - - pub fn try_from_columns( - columns: Vec, - maybe_schema: Option, - ) -> Result { - let mut column_values: ColumnMap = IndexMap::new(); - - for column in columns { - let name = column.name().to_string(); - for value in column { - conversion::insert_value(value, name.clone(), &mut column_values, &maybe_schema)?; - } - } - - let df = conversion::from_parsed_columns(column_values)?; - add_missing_columns(df, &maybe_schema, Span::unknown()) - } - - pub fn fill_list_nan(list: Vec, list_span: Span, fill: Value) -> Value { - let newlist = list - .into_iter() - .map(|value| { - let span = value.span(); - match value { - Value::Float { val, .. } => { - if val.is_nan() { - fill.clone() - } else { - value - } - } - Value::List { vals, .. } => Self::fill_list_nan(vals, span, fill.clone()), - _ => value, - } - }) - .collect::>(); - Value::list(newlist, list_span) - } - - pub fn columns(&self, span: Span) -> Result, ShellError> { - let height = self.df.height(); - self.df - .get_columns() - .iter() - .map(|col| conversion::create_column(col, 0, height, span)) - .collect::, ShellError>>() - } - - pub fn try_from_value(value: Value) -> Result { - if Self::can_downcast(&value) { - Ok(Self::get_df(value)?) - } else if NuLazyFrame::can_downcast(&value) { - let span = value.span(); - let lazy = NuLazyFrame::try_from_value(value)?; - let df = lazy.collect(span)?; - Ok(df) - } else { - Err(ShellError::CantConvert { - to_type: "lazy or eager dataframe".into(), - from_type: value.get_type().to_string(), - span: value.span(), - help: None, - }) - } - } - - pub fn get_df(value: Value) -> Result { - let span = value.span(); - match value { - Value::Custom { val, .. } => match val.as_any().downcast_ref::() { - Some(df) => Ok(NuDataFrame { - df: df.df.clone(), - from_lazy: false, - }), - None => Err(ShellError::CantConvert { - to_type: "dataframe".into(), - from_type: "non-dataframe".into(), - span, - help: None, - }), - }, - x => Err(ShellError::CantConvert { - to_type: "dataframe".into(), - from_type: x.get_type().to_string(), - span: x.span(), - help: None, - }), - } - } - - pub fn try_from_pipeline(input: PipelineData, span: Span) -> Result { - let value = input.into_value(span)?; - Self::try_from_value(value) - } - - pub fn can_downcast(value: &Value) -> bool { - if let Value::Custom { val, .. } = value { - val.as_any().downcast_ref::().is_some() - } else { - false - } - } - - pub fn column(&self, column: &str, span: Span) -> Result { - let s = self.df.column(column).map_err(|_| { - let possibilities = self - .df - .get_column_names() - .iter() - .map(|name| name.to_string()) - .collect::>(); - - let option = did_you_mean(&possibilities, column).unwrap_or_else(|| column.to_string()); - ShellError::DidYouMean { - suggestion: option, - span, - } - })?; - - let df = DataFrame::new(vec![s.clone()]).map_err(|e| ShellError::GenericError { - error: "Error creating dataframe".into(), - msg: e.to_string(), - span: Some(span), - help: None, - inner: vec![], - })?; - - Ok(Self { - df, - from_lazy: false, - }) - } - - pub fn is_series(&self) -> bool { - self.df.width() == 1 - } - - pub fn as_series(&self, span: Span) -> Result { - if !self.is_series() { - return Err(ShellError::GenericError { - error: "Error using as series".into(), - msg: "dataframe has more than one column".into(), - span: Some(span), - help: None, - inner: vec![], - }); - } - - let series = self - .df - .get_columns() - .first() - .expect("We have already checked that the width is 1"); - - Ok(series.clone()) - } - - pub fn get_value(&self, row: usize, span: Span) -> Result { - let series = self.as_series(span)?; - let column = conversion::create_column(&series, row, row + 1, span)?; - - if column.len() == 0 { - Err(ShellError::AccessEmptyContent { span }) - } else { - let value = column - .into_iter() - .next() - .expect("already checked there is a value"); - Ok(value) - } - } - - // Print is made out a head and if the dataframe is too large, then a tail - pub fn print(&self, span: Span) -> Result, ShellError> { - let df = &self.df; - let size: usize = 20; - - if df.height() > size { - let sample_size = size / 2; - let mut values = self.head(Some(sample_size), span)?; - conversion::add_separator(&mut values, df, span); - let remaining = df.height() - sample_size; - let tail_size = remaining.min(sample_size); - let mut tail_values = self.tail(Some(tail_size), span)?; - values.append(&mut tail_values); - - Ok(values) - } else { - Ok(self.head(Some(size), span)?) - } - } - - pub fn height(&self) -> usize { - self.df.height() - } - - pub fn head(&self, rows: Option, span: Span) -> Result, ShellError> { - let to_row = rows.unwrap_or(5); - let values = self.to_rows(0, to_row, span)?; - - Ok(values) - } - - pub fn tail(&self, rows: Option, span: Span) -> Result, ShellError> { - let df = &self.df; - let to_row = df.height(); - let size = rows.unwrap_or(DEFAULT_ROWS); - let from_row = to_row.saturating_sub(size); - - let values = self.to_rows(from_row, to_row, span)?; - - Ok(values) - } - - pub fn to_rows( - &self, - from_row: usize, - to_row: usize, - span: Span, - ) -> Result, ShellError> { - let df = &self.df; - let upper_row = to_row.min(df.height()); - - let mut size: usize = 0; - let columns = self - .df - .get_columns() - .iter() - .map( - |col| match conversion::create_column(col, from_row, upper_row, span) { - Ok(col) => { - size = col.len(); - Ok(col) - } - Err(e) => Err(e), - }, - ) - .collect::, ShellError>>()?; - - let mut iterators = columns - .into_iter() - .map(|col| (col.name().to_string(), col.into_iter())) - .collect::)>>(); - - let values = (0..size) - .map(|i| { - let mut record = Record::new(); - - record.push("index", Value::int((i + from_row) as i64, span)); - - for (name, col) in &mut iterators { - record.push(name.clone(), col.next().unwrap_or(Value::nothing(span))); - } - - Value::record(record, span) - }) - .collect::>(); - - Ok(values) - } - - // Dataframes are considered equal if they have the same shape, column name and values - pub fn is_equal(&self, other: &Self) -> Option { - if self.as_ref().width() == 0 { - // checking for empty dataframe - return None; - } - - if self.as_ref().get_column_names() != other.as_ref().get_column_names() { - // checking both dataframes share the same names - return None; - } - - if self.as_ref().height() != other.as_ref().height() { - // checking both dataframes have the same row size - return None; - } - - // sorting dataframe by the first column - let column_names = self.as_ref().get_column_names(); - let first_col = column_names - .first() - .expect("already checked that dataframe is different than 0"); - - // if unable to sort, then unable to compare - let lhs = match self - .as_ref() - .sort(vec![*first_col], SortMultipleOptions::default()) - { - Ok(df) => df, - Err(_) => return None, - }; - - let rhs = match other - .as_ref() - .sort(vec![*first_col], SortMultipleOptions::default()) - { - Ok(df) => df, - Err(_) => return None, - }; - - for name in self.as_ref().get_column_names() { - let self_series = lhs.column(name).expect("name from dataframe names"); - - let other_series = rhs - .column(name) - .expect("already checked that name in other"); - - let self_series = match self_series.dtype() { - // Casting needed to compare other numeric types with nushell numeric type. - // In nushell we only have i64 integer numeric types and any array created - // with nushell untagged primitives will be of type i64 - DataType::UInt32 | DataType::Int32 => match self_series.cast(&DataType::Int64) { - Ok(series) => series, - Err(_) => return None, - }, - _ => self_series.clone(), - }; - - if !self_series.equals(other_series) { - return None; - } - } - - Some(Ordering::Equal) - } - - pub fn schema(&self) -> NuSchema { - NuSchema::new(self.df.schema()) - } -} - -fn add_missing_columns( - df: NuDataFrame, - maybe_schema: &Option, - span: Span, -) -> Result { - // If there are fields that are in the schema, but not in the dataframe - // add them to the dataframe. - if let Some(schema) = maybe_schema { - let fields = df.df.fields(); - let df_field_names: HashSet<&str> = fields.iter().map(|f| f.name().as_str()).collect(); - - let missing: Vec<(&str, &DataType)> = schema - .schema - .iter() - .filter_map(|(name, dtype)| { - let name = name.as_str(); - if !df_field_names.contains(name) { - Some((name, dtype)) - } else { - None - } - }) - .collect(); - - let missing_exprs: Vec = missing - .iter() - .map(|(name, dtype)| lit(Null {}).cast((*dtype).to_owned()).alias(name)) - .collect(); - - let df = if !missing.is_empty() { - let with_columns = df.lazy().with_columns(missing_exprs); - NuLazyFrame::new(true, with_columns).collect(span)? - } else { - df - }; - Ok(df) - } else { - Ok(df) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/values/nu_dataframe/operations.rs b/crates/nu-cmd-dataframe/src/dataframe/values/nu_dataframe/operations.rs deleted file mode 100644 index ff2f7b7604..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/values/nu_dataframe/operations.rs +++ /dev/null @@ -1,206 +0,0 @@ -use super::{ - between_values::{between_dataframes, compute_between_series, compute_series_single_value}, - NuDataFrame, -}; -use nu_protocol::{ast::Operator, ShellError, Span, Spanned, Value}; -use polars::prelude::{DataFrame, Series}; - -pub enum Axis { - Row, - Column, -} - -impl NuDataFrame { - pub fn compute_with_value( - &self, - lhs_span: Span, - operator: Operator, - op_span: Span, - right: &Value, - ) -> Result { - let rhs_span = right.span(); - match right { - Value::Custom { val: rhs, .. } => { - let rhs = rhs.as_any().downcast_ref::().ok_or_else(|| { - ShellError::DowncastNotPossible { - msg: "Unable to create dataframe".to_string(), - span: rhs_span, - } - })?; - - match (self.is_series(), rhs.is_series()) { - (true, true) => { - let lhs = &self - .as_series(lhs_span) - .expect("Already checked that is a series"); - let rhs = &rhs - .as_series(rhs_span) - .expect("Already checked that is a series"); - - if lhs.dtype() != rhs.dtype() { - return Err(ShellError::IncompatibleParameters { - left_message: format!("datatype {}", lhs.dtype()), - left_span: lhs_span, - right_message: format!("datatype {}", lhs.dtype()), - right_span: rhs_span, - }); - } - - if lhs.len() != rhs.len() { - return Err(ShellError::IncompatibleParameters { - left_message: format!("len {}", lhs.len()), - left_span: lhs_span, - right_message: format!("len {}", rhs.len()), - right_span: rhs_span, - }); - } - - let op = Spanned { - item: operator, - span: op_span, - }; - - compute_between_series( - op, - &NuDataFrame::default_value(lhs_span), - lhs, - right, - rhs, - ) - } - _ => { - if self.df.height() != rhs.df.height() { - return Err(ShellError::IncompatibleParameters { - left_message: format!("rows {}", self.df.height()), - left_span: lhs_span, - right_message: format!("rows {}", rhs.df.height()), - right_span: rhs_span, - }); - } - - let op = Spanned { - item: operator, - span: op_span, - }; - - between_dataframes( - op, - &NuDataFrame::default_value(lhs_span), - self, - right, - rhs, - ) - } - } - } - _ => { - let op = Spanned { - item: operator, - span: op_span, - }; - - compute_series_single_value(op, &NuDataFrame::default_value(lhs_span), self, right) - } - } - } - - pub fn append_df( - &self, - other: &NuDataFrame, - axis: Axis, - span: Span, - ) -> Result { - match axis { - Axis::Row => { - let mut columns: Vec<&str> = Vec::new(); - - let new_cols = self - .df - .get_columns() - .iter() - .chain(other.df.get_columns()) - .map(|s| { - let name = if columns.contains(&s.name()) { - format!("{}_{}", s.name(), "x") - } else { - columns.push(s.name()); - s.name().to_string() - }; - - let mut series = s.clone(); - series.rename(&name); - series - }) - .collect::>(); - - let df_new = DataFrame::new(new_cols).map_err(|e| ShellError::GenericError { - error: "Error creating dataframe".into(), - msg: e.to_string(), - span: Some(span), - help: None, - inner: vec![], - })?; - - Ok(NuDataFrame::new(false, df_new)) - } - Axis::Column => { - if self.df.width() != other.df.width() { - return Err(ShellError::IncompatibleParametersSingle { - msg: "Dataframes with different number of columns".into(), - span, - }); - } - - if !self - .df - .get_column_names() - .iter() - .all(|col| other.df.get_column_names().contains(col)) - { - return Err(ShellError::IncompatibleParametersSingle { - msg: "Dataframes with different columns names".into(), - span, - }); - } - - let new_cols = self - .df - .get_columns() - .iter() - .map(|s| { - let other_col = other - .df - .column(s.name()) - .expect("Already checked that dataframes have same columns"); - - let mut tmp = s.clone(); - let res = tmp.append(other_col); - - match res { - Ok(s) => Ok(s.clone()), - Err(e) => Err({ - ShellError::GenericError { - error: "Error appending dataframe".into(), - msg: format!("Unable to append: {e}"), - span: Some(span), - help: None, - inner: vec![], - } - }), - } - }) - .collect::, ShellError>>()?; - - let df_new = DataFrame::new(new_cols).map_err(|e| ShellError::GenericError { - error: "Error appending dataframe".into(), - msg: format!("Unable to append dataframes: {e}"), - span: Some(span), - help: None, - inner: vec![], - })?; - - Ok(NuDataFrame::new(false, df_new)) - } - } - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/values/nu_expression/custom_value.rs b/crates/nu-cmd-dataframe/src/dataframe/values/nu_expression/custom_value.rs deleted file mode 100644 index 7a7f59e648..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/values/nu_expression/custom_value.rs +++ /dev/null @@ -1,147 +0,0 @@ -use super::NuExpression; -use nu_protocol::{ - ast::{Comparison, Math, Operator}, - CustomValue, ShellError, Span, Type, Value, -}; -use polars::prelude::Expr; -use std::ops::{Add, Div, Mul, Rem, Sub}; - -// CustomValue implementation for NuDataFrame -impl CustomValue for NuExpression { - fn typetag_name(&self) -> &'static str { - "expression" - } - - fn typetag_deserialize(&self) { - unimplemented!("typetag_deserialize") - } - - fn clone_value(&self, span: nu_protocol::Span) -> Value { - let cloned = NuExpression(self.0.clone()); - - Value::custom(Box::new(cloned), span) - } - - fn type_name(&self) -> String { - self.typetag_name().to_string() - } - - fn to_base_value(&self, span: Span) -> Result { - self.to_value(span) - } - - fn as_any(&self) -> &dyn std::any::Any { - self - } - - fn as_mut_any(&mut self) -> &mut dyn std::any::Any { - self - } - - fn operation( - &self, - lhs_span: Span, - operator: Operator, - op: Span, - right: &Value, - ) -> Result { - compute_with_value(self, lhs_span, operator, op, right) - } -} - -fn compute_with_value( - left: &NuExpression, - lhs_span: Span, - operator: Operator, - op: Span, - right: &Value, -) -> Result { - let rhs_span = right.span(); - match right { - Value::Custom { val: rhs, .. } => { - let rhs = rhs.as_any().downcast_ref::().ok_or_else(|| { - ShellError::DowncastNotPossible { - msg: "Unable to create expression".into(), - span: rhs_span, - } - })?; - - match rhs.as_ref() { - polars::prelude::Expr::Literal(..) => { - with_operator(operator, left, rhs, lhs_span, right.span(), op) - } - _ => Err(ShellError::TypeMismatch { - err_message: "Only literal expressions or number".into(), - span: right.span(), - }), - } - } - _ => { - let rhs = NuExpression::try_from_value(right.clone())?; - with_operator(operator, left, &rhs, lhs_span, right.span(), op) - } - } -} - -fn with_operator( - operator: Operator, - left: &NuExpression, - right: &NuExpression, - lhs_span: Span, - rhs_span: Span, - op_span: Span, -) -> Result { - match operator { - Operator::Math(Math::Plus) => apply_arithmetic(left, right, lhs_span, Add::add), - Operator::Math(Math::Minus) => apply_arithmetic(left, right, lhs_span, Sub::sub), - Operator::Math(Math::Multiply) => apply_arithmetic(left, right, lhs_span, Mul::mul), - Operator::Math(Math::Divide) => apply_arithmetic(left, right, lhs_span, Div::div), - Operator::Math(Math::Modulo) => apply_arithmetic(left, right, lhs_span, Rem::rem), - Operator::Math(Math::FloorDivision) => apply_arithmetic(left, right, lhs_span, Div::div), - Operator::Comparison(Comparison::Equal) => Ok(left - .clone() - .apply_with_expr(right.clone(), Expr::eq) - .into_value(lhs_span)), - Operator::Comparison(Comparison::NotEqual) => Ok(left - .clone() - .apply_with_expr(right.clone(), Expr::neq) - .into_value(lhs_span)), - Operator::Comparison(Comparison::GreaterThan) => Ok(left - .clone() - .apply_with_expr(right.clone(), Expr::gt) - .into_value(lhs_span)), - Operator::Comparison(Comparison::GreaterThanOrEqual) => Ok(left - .clone() - .apply_with_expr(right.clone(), Expr::gt_eq) - .into_value(lhs_span)), - Operator::Comparison(Comparison::LessThan) => Ok(left - .clone() - .apply_with_expr(right.clone(), Expr::lt) - .into_value(lhs_span)), - Operator::Comparison(Comparison::LessThanOrEqual) => Ok(left - .clone() - .apply_with_expr(right.clone(), Expr::lt_eq) - .into_value(lhs_span)), - _ => Err(ShellError::OperatorMismatch { - op_span, - lhs_ty: Type::Custom(left.typetag_name().into()).to_string(), - lhs_span, - rhs_ty: Type::Custom(right.typetag_name().into()).to_string(), - rhs_span, - }), - } -} - -fn apply_arithmetic( - left: &NuExpression, - right: &NuExpression, - span: Span, - f: F, -) -> Result -where - F: Fn(Expr, Expr) -> Expr, -{ - let expr: NuExpression = f(left.as_ref().clone(), right.as_ref().clone()).into(); - - Ok(expr.into_value(span)) -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/values/nu_expression/mod.rs b/crates/nu-cmd-dataframe/src/dataframe/values/nu_expression/mod.rs deleted file mode 100644 index cee31d7b53..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/values/nu_expression/mod.rs +++ /dev/null @@ -1,443 +0,0 @@ -mod custom_value; - -use nu_protocol::{record, PipelineData, ShellError, Span, Value}; -use polars::prelude::{col, AggExpr, Expr, Literal}; -use serde::{Deserialize, Deserializer, Serialize, Serializer}; - -// Polars Expression wrapper for Nushell operations -// Object is behind and Option to allow easy implementation of -// the Deserialize trait -#[derive(Default, Clone, Debug)] -pub struct NuExpression(Option); - -// Mocked serialization of the LazyFrame object -impl Serialize for NuExpression { - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - serializer.serialize_none() - } -} - -// Mocked deserialization of the LazyFrame object -impl<'de> Deserialize<'de> for NuExpression { - fn deserialize(_deserializer: D) -> Result - where - D: Deserializer<'de>, - { - Ok(NuExpression::default()) - } -} - -// Referenced access to the real LazyFrame -impl AsRef for NuExpression { - fn as_ref(&self) -> &polars::prelude::Expr { - // The only case when there cannot be an expr is if it is created - // using the default function or if created by deserializing something - self.0.as_ref().expect("there should always be a frame") - } -} - -impl AsMut for NuExpression { - fn as_mut(&mut self) -> &mut polars::prelude::Expr { - // The only case when there cannot be an expr is if it is created - // using the default function or if created by deserializing something - self.0.as_mut().expect("there should always be a frame") - } -} - -impl From for NuExpression { - fn from(expr: Expr) -> Self { - Self(Some(expr)) - } -} - -impl NuExpression { - pub fn into_value(self, span: Span) -> Value { - Value::custom(Box::new(self), span) - } - - pub fn try_from_value(value: Value) -> Result { - let span = value.span(); - match value { - Value::Custom { val, .. } => match val.as_any().downcast_ref::() { - Some(expr) => Ok(NuExpression(expr.0.clone())), - None => Err(ShellError::CantConvert { - to_type: "lazy expression".into(), - from_type: "non-dataframe".into(), - span, - help: None, - }), - }, - Value::String { val, .. } => Ok(val.lit().into()), - Value::Int { val, .. } => Ok(val.lit().into()), - Value::Bool { val, .. } => Ok(val.lit().into()), - Value::Float { val, .. } => Ok(val.lit().into()), - x => Err(ShellError::CantConvert { - to_type: "lazy expression".into(), - from_type: x.get_type().to_string(), - span: x.span(), - help: None, - }), - } - } - - pub fn try_from_pipeline(input: PipelineData, span: Span) -> Result { - let value = input.into_value(span)?; - Self::try_from_value(value) - } - - pub fn can_downcast(value: &Value) -> bool { - match value { - Value::Custom { val, .. } => val.as_any().downcast_ref::().is_some(), - Value::List { vals, .. } => vals.iter().all(Self::can_downcast), - Value::String { .. } | Value::Int { .. } | Value::Bool { .. } | Value::Float { .. } => { - true - } - _ => false, - } - } - - pub fn into_polars(self) -> Expr { - self.0.expect("Expression cannot be none to convert") - } - - pub fn apply_with_expr(self, other: NuExpression, f: F) -> Self - where - F: Fn(Expr, Expr) -> Expr, - { - let expr = self.0.expect("Lazy expression must not be empty to apply"); - let other = other.0.expect("Lazy expression must not be empty to apply"); - - f(expr, other).into() - } - - pub fn to_value(&self, span: Span) -> Result { - expr_to_value(self.as_ref(), span) - } - - // Convenient function to extract multiple Expr that could be inside a nushell Value - pub fn extract_exprs(value: Value) -> Result, ShellError> { - ExtractedExpr::extract_exprs(value).map(ExtractedExpr::into_exprs) - } -} - -#[derive(Debug)] -// Enum to represent the parsing of the expressions from Value -enum ExtractedExpr { - Single(Expr), - List(Vec), -} - -impl ExtractedExpr { - fn into_exprs(self) -> Vec { - match self { - Self::Single(expr) => vec![expr], - Self::List(expressions) => expressions - .into_iter() - .flat_map(ExtractedExpr::into_exprs) - .collect(), - } - } - - fn extract_exprs(value: Value) -> Result { - match value { - Value::String { val, .. } => Ok(ExtractedExpr::Single(col(val.as_str()))), - Value::Custom { .. } => NuExpression::try_from_value(value) - .map(NuExpression::into_polars) - .map(ExtractedExpr::Single), - Value::List { vals, .. } => vals - .into_iter() - .map(Self::extract_exprs) - .collect::, ShellError>>() - .map(ExtractedExpr::List), - x => Err(ShellError::CantConvert { - to_type: "expression".into(), - from_type: x.get_type().to_string(), - span: x.span(), - help: None, - }), - } - } -} - -pub fn expr_to_value(expr: &Expr, span: Span) -> Result { - match expr { - Expr::Alias(expr, alias) => Ok(Value::record( - record! { - "expr" => expr_to_value(expr.as_ref(), span)?, - "alias" => Value::string(alias.as_ref(), span), - }, - span, - )), - Expr::Column(name) => Ok(Value::record( - record! { - "expr" => Value::string("column", span), - "value" => Value::string(name.to_string(), span), - }, - span, - )), - Expr::Columns(columns) => { - let value = columns.iter().map(|col| Value::string(col, span)).collect(); - Ok(Value::record( - record! { - "expr" => Value::string("columns", span), - "value" => Value::list(value, span), - }, - span, - )) - } - Expr::Literal(literal) => Ok(Value::record( - record! { - "expr" => Value::string("literal", span), - "value" => Value::string(format!("{literal:?}"), span), - }, - span, - )), - Expr::BinaryExpr { left, op, right } => Ok(Value::record( - record! { - "left" => expr_to_value(left, span)?, - "op" => Value::string(format!("{op:?}"), span), - "right" => expr_to_value(right, span)?, - }, - span, - )), - Expr::Ternary { - predicate, - truthy, - falsy, - } => Ok(Value::record( - record! { - "predicate" => expr_to_value(predicate.as_ref(), span)?, - "truthy" => expr_to_value(truthy.as_ref(), span)?, - "falsy" => expr_to_value(falsy.as_ref(), span)?, - }, - span, - )), - Expr::Agg(agg_expr) => { - let value = match agg_expr { - AggExpr::Min { input: expr, .. } - | AggExpr::Max { input: expr, .. } - | AggExpr::Median(expr) - | AggExpr::NUnique(expr) - | AggExpr::First(expr) - | AggExpr::Last(expr) - | AggExpr::Mean(expr) - | AggExpr::Implode(expr) - | AggExpr::Count(expr, _) - | AggExpr::Sum(expr) - | AggExpr::AggGroups(expr) - | AggExpr::Std(expr, _) - | AggExpr::Var(expr, _) => expr_to_value(expr.as_ref(), span), - AggExpr::Quantile { - expr, - quantile, - interpol, - } => Ok(Value::record( - record! { - "expr" => expr_to_value(expr.as_ref(), span)?, - "quantile" => expr_to_value(quantile.as_ref(), span)?, - "interpol" => Value::string(format!("{interpol:?}"), span), - }, - span, - )), - }; - - Ok(Value::record( - record! { - "expr" => Value::string("agg", span), - "value" => value?, - }, - span, - )) - } - Expr::Len => Ok(Value::record( - record! { "expr" => Value::string("count", span) }, - span, - )), - Expr::Wildcard => Ok(Value::record( - record! { "expr" => Value::string("wildcard", span) }, - span, - )), - Expr::Explode(expr) => Ok(Value::record( - record! { "expr" => expr_to_value(expr.as_ref(), span)? }, - span, - )), - Expr::KeepName(expr) => Ok(Value::record( - record! { "expr" => expr_to_value(expr.as_ref(), span)? }, - span, - )), - Expr::Nth(i) => Ok(Value::record( - record! { "expr" => Value::int(*i, span) }, - span, - )), - Expr::DtypeColumn(dtypes) => { - let vals = dtypes - .iter() - .map(|d| Value::string(format!("{d}"), span)) - .collect(); - - Ok(Value::list(vals, span)) - } - Expr::Sort { expr, options } => Ok(Value::record( - record! { - "expr" => expr_to_value(expr.as_ref(), span)?, - "options" => Value::string(format!("{options:?}"), span), - }, - span, - )), - Expr::Cast { - expr, - data_type, - strict, - } => Ok(Value::record( - record! { - "expr" => expr_to_value(expr.as_ref(), span)?, - "dtype" => Value::string(format!("{data_type:?}"), span), - "strict" => Value::bool(*strict, span), - }, - span, - )), - Expr::Gather { - expr, - idx, - returns_scalar: _, - } => Ok(Value::record( - record! { - "expr" => expr_to_value(expr.as_ref(), span)?, - "idx" => expr_to_value(idx.as_ref(), span)?, - }, - span, - )), - Expr::SortBy { - expr, - by, - sort_options, - } => { - let by: Result, ShellError> = - by.iter().map(|b| expr_to_value(b, span)).collect(); - let descending: Vec = sort_options - .descending - .iter() - .map(|r| Value::bool(*r, span)) - .collect(); - - Ok(Value::record( - record! { - "expr" => expr_to_value(expr.as_ref(), span)?, - "by" => Value::list(by?, span), - "descending" => Value::list(descending, span), - }, - span, - )) - } - Expr::Filter { input, by } => Ok(Value::record( - record! { - "input" => expr_to_value(input.as_ref(), span)?, - "by" => expr_to_value(by.as_ref(), span)?, - }, - span, - )), - Expr::Slice { - input, - offset, - length, - } => Ok(Value::record( - record! { - "input" => expr_to_value(input.as_ref(), span)?, - "offset" => expr_to_value(offset.as_ref(), span)?, - "length" => expr_to_value(length.as_ref(), span)?, - }, - span, - )), - Expr::Exclude(expr, excluded) => { - let excluded = excluded - .iter() - .map(|e| Value::string(format!("{e:?}"), span)) - .collect(); - - Ok(Value::record( - record! { - "expr" => expr_to_value(expr.as_ref(), span)?, - "excluded" => Value::list(excluded, span), - }, - span, - )) - } - Expr::RenameAlias { expr, function } => Ok(Value::record( - record! { - "expr" => expr_to_value(expr.as_ref(), span)?, - "function" => Value::string(format!("{function:?}"), span), - }, - span, - )), - Expr::AnonymousFunction { - input, - function, - output_type, - options, - } => { - let input: Result, ShellError> = - input.iter().map(|e| expr_to_value(e, span)).collect(); - Ok(Value::record( - record! { - "input" => Value::list(input?, span), - "function" => Value::string(format!("{function:?}"), span), - "output_type" => Value::string(format!("{output_type:?}"), span), - "options" => Value::string(format!("{options:?}"), span), - }, - span, - )) - } - Expr::Function { - input, - function, - options, - } => { - let input: Result, ShellError> = - input.iter().map(|e| expr_to_value(e, span)).collect(); - Ok(Value::record( - record! { - "input" => Value::list(input?, span), - "function" => Value::string(format!("{function:?}"), span), - "options" => Value::string(format!("{options:?}"), span), - }, - span, - )) - } - Expr::Window { - function, - partition_by, - options, - } => { - let partition_by: Result, ShellError> = partition_by - .iter() - .map(|e| expr_to_value(e, span)) - .collect(); - - Ok(Value::record( - record! { - "function" => expr_to_value(function, span)?, - "partition_by" => Value::list(partition_by?, span), - "options" => Value::string(format!("{options:?}"), span), - }, - span, - )) - } - Expr::SubPlan(_, _) => Err(ShellError::UnsupportedInput { - msg: "Expressions of type SubPlan are not yet supported".to_string(), - input: format!("Expression is {expr:?}"), - msg_span: span, - input_span: Span::unknown(), - }), - // the parameter polars_plan::dsl::selector::Selector is not publicly exposed. - // I am not sure what we can meaningfully do with this at this time. - Expr::Selector(_) => Err(ShellError::UnsupportedInput { - msg: "Expressions of type Selector to Nu Values is not yet supported".to_string(), - input: format!("Expression is {expr:?}"), - msg_span: span, - input_span: Span::unknown(), - }), - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/values/nu_lazyframe/custom_value.rs b/crates/nu-cmd-dataframe/src/dataframe/values/nu_lazyframe/custom_value.rs deleted file mode 100644 index f747ae4d18..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/values/nu_lazyframe/custom_value.rs +++ /dev/null @@ -1,50 +0,0 @@ -use super::NuLazyFrame; -use nu_protocol::{record, CustomValue, ShellError, Span, Value}; - -// CustomValue implementation for NuDataFrame -impl CustomValue for NuLazyFrame { - fn typetag_name(&self) -> &'static str { - "lazyframe" - } - - fn typetag_deserialize(&self) { - unimplemented!("typetag_deserialize") - } - - fn clone_value(&self, span: nu_protocol::Span) -> Value { - let cloned = NuLazyFrame { - lazy: self.lazy.clone(), - from_eager: self.from_eager, - schema: self.schema.clone(), - }; - - Value::custom(Box::new(cloned), span) - } - - fn type_name(&self) -> String { - self.typetag_name().to_string() - } - - fn to_base_value(&self, span: Span) -> Result { - let optimized_plan = self - .as_ref() - .describe_optimized_plan() - .unwrap_or_else(|_| "".to_string()); - - Ok(Value::record( - record! { - "plan" => Value::string(self.as_ref().describe_plan(), span), - "optimized_plan" => Value::string(optimized_plan, span), - }, - span, - )) - } - - fn as_any(&self) -> &dyn std::any::Any { - self - } - - fn as_mut_any(&mut self) -> &mut dyn std::any::Any { - self - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/values/nu_lazyframe/mod.rs b/crates/nu-cmd-dataframe/src/dataframe/values/nu_lazyframe/mod.rs deleted file mode 100644 index 355516d340..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/values/nu_lazyframe/mod.rs +++ /dev/null @@ -1,188 +0,0 @@ -mod custom_value; - -use super::{NuDataFrame, NuExpression}; -use core::fmt; -use nu_protocol::{PipelineData, ShellError, Span, Value}; -use polars::prelude::{Expr, IntoLazy, LazyFrame, Schema}; -use serde::{Deserialize, Deserializer, Serialize, Serializer}; - -// Lazyframe wrapper for Nushell operations -// Polars LazyFrame is behind and Option to allow easy implementation of -// the Deserialize trait -#[derive(Default)] -pub struct NuLazyFrame { - pub lazy: Option, - pub schema: Option, - pub from_eager: bool, -} - -// Mocked serialization of the LazyFrame object -impl Serialize for NuLazyFrame { - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - serializer.serialize_none() - } -} - -// Mocked deserialization of the LazyFrame object -impl<'de> Deserialize<'de> for NuLazyFrame { - fn deserialize(_deserializer: D) -> Result - where - D: Deserializer<'de>, - { - Ok(NuLazyFrame::default()) - } -} - -impl fmt::Debug for NuLazyFrame { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "NuLazyframe") - } -} - -// Referenced access to the real LazyFrame -impl AsRef for NuLazyFrame { - fn as_ref(&self) -> &polars::prelude::LazyFrame { - // The only case when there cannot be a lazy frame is if it is created - // using the default function or if created by deserializing something - self.lazy.as_ref().expect("there should always be a frame") - } -} - -impl AsMut for NuLazyFrame { - fn as_mut(&mut self) -> &mut polars::prelude::LazyFrame { - // The only case when there cannot be a lazy frame is if it is created - // using the default function or if created by deserializing something - self.lazy.as_mut().expect("there should always be a frame") - } -} - -impl From for NuLazyFrame { - fn from(lazy_frame: LazyFrame) -> Self { - Self { - lazy: Some(lazy_frame), - from_eager: false, - schema: None, - } - } -} - -impl NuLazyFrame { - pub fn new(from_eager: bool, lazy: LazyFrame) -> Self { - Self { - lazy: Some(lazy), - from_eager, - schema: None, - } - } - - pub fn from_dataframe(df: NuDataFrame) -> Self { - let lazy = df.as_ref().clone().lazy(); - Self { - lazy: Some(lazy), - from_eager: true, - schema: Some(df.as_ref().schema()), - } - } - - pub fn into_value(self, span: Span) -> Result { - if self.from_eager { - let df = self.collect(span)?; - Ok(Value::custom(Box::new(df), span)) - } else { - Ok(Value::custom(Box::new(self), span)) - } - } - - pub fn into_polars(self) -> LazyFrame { - self.lazy.expect("lazyframe cannot be none to convert") - } - - pub fn collect(self, span: Span) -> Result { - self.lazy - .expect("No empty lazy for collect") - .collect() - .map_err(|e| ShellError::GenericError { - error: "Error collecting lazy frame".into(), - msg: e.to_string(), - span: Some(span), - help: None, - inner: vec![], - }) - .map(|df| NuDataFrame { - df, - from_lazy: !self.from_eager, - }) - } - - pub fn try_from_value(value: Value) -> Result { - if Self::can_downcast(&value) { - Ok(Self::get_lazy_df(value)?) - } else if NuDataFrame::can_downcast(&value) { - let df = NuDataFrame::try_from_value(value)?; - Ok(NuLazyFrame::from_dataframe(df)) - } else { - Err(ShellError::CantConvert { - to_type: "lazy or eager dataframe".into(), - from_type: value.get_type().to_string(), - span: value.span(), - help: None, - }) - } - } - - pub fn try_from_pipeline(input: PipelineData, span: Span) -> Result { - let value = input.into_value(span)?; - Self::try_from_value(value) - } - - pub fn get_lazy_df(value: Value) -> Result { - let span = value.span(); - match value { - Value::Custom { val, .. } => match val.as_any().downcast_ref::() { - Some(expr) => Ok(Self { - lazy: expr.lazy.clone(), - from_eager: false, - schema: None, - }), - None => Err(ShellError::CantConvert { - to_type: "lazy frame".into(), - from_type: "non-dataframe".into(), - span, - help: None, - }), - }, - x => Err(ShellError::CantConvert { - to_type: "lazy frame".into(), - from_type: x.get_type().to_string(), - span: x.span(), - help: None, - }), - } - } - - pub fn can_downcast(value: &Value) -> bool { - if let Value::Custom { val, .. } = value { - val.as_any().downcast_ref::().is_some() - } else { - false - } - } - - pub fn apply_with_expr(self, expr: NuExpression, f: F) -> Self - where - F: Fn(LazyFrame, Expr) -> LazyFrame, - { - let df = self.lazy.expect("Lazy frame must not be empty to apply"); - let expr = expr.into_polars(); - let new_frame = f(df, expr); - - Self { - from_eager: self.from_eager, - lazy: Some(new_frame), - schema: None, - } - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/values/nu_lazygroupby/custom_value.rs b/crates/nu-cmd-dataframe/src/dataframe/values/nu_lazygroupby/custom_value.rs deleted file mode 100644 index 6ac6cc6046..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/values/nu_lazygroupby/custom_value.rs +++ /dev/null @@ -1,44 +0,0 @@ -use super::NuLazyGroupBy; -use nu_protocol::{record, CustomValue, ShellError, Span, Value}; - -// CustomValue implementation for NuDataFrame -impl CustomValue for NuLazyGroupBy { - fn typetag_name(&self) -> &'static str { - "lazygroupby" - } - - fn typetag_deserialize(&self) { - unimplemented!("typetag_deserialize") - } - - fn clone_value(&self, span: nu_protocol::Span) -> Value { - let cloned = NuLazyGroupBy { - group_by: self.group_by.clone(), - schema: self.schema.clone(), - from_eager: self.from_eager, - }; - - Value::custom(Box::new(cloned), span) - } - - fn type_name(&self) -> String { - self.typetag_name().to_string() - } - - fn to_base_value(&self, span: Span) -> Result { - Ok(Value::record( - record! { - "LazyGroupBy" => Value::string("apply aggregation to complete execution plan", span) - }, - span, - )) - } - - fn as_any(&self) -> &dyn std::any::Any { - self - } - - fn as_mut_any(&mut self) -> &mut dyn std::any::Any { - self - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/values/nu_lazygroupby/mod.rs b/crates/nu-cmd-dataframe/src/dataframe/values/nu_lazygroupby/mod.rs deleted file mode 100644 index e1bcb30069..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/values/nu_lazygroupby/mod.rs +++ /dev/null @@ -1,113 +0,0 @@ -mod custom_value; - -use core::fmt; -use nu_protocol::{PipelineData, ShellError, Span, Value}; -use polars::prelude::{LazyGroupBy, Schema}; -use serde::{Deserialize, Deserializer, Serialize, Serializer}; - -// Lazyframe wrapper for Nushell operations -// Polars LazyFrame is behind and Option to allow easy implementation of -// the Deserialize trait -#[derive(Default)] -pub struct NuLazyGroupBy { - pub group_by: Option, - pub schema: Option, - pub from_eager: bool, -} - -// Mocked serialization of the LazyFrame object -impl Serialize for NuLazyGroupBy { - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - serializer.serialize_none() - } -} - -// Mocked deserialization of the LazyFrame object -impl<'de> Deserialize<'de> for NuLazyGroupBy { - fn deserialize(_deserializer: D) -> Result - where - D: Deserializer<'de>, - { - Ok(NuLazyGroupBy::default()) - } -} - -impl fmt::Debug for NuLazyGroupBy { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "NuLazyGroupBy") - } -} - -// Referenced access to the real LazyFrame -impl AsRef for NuLazyGroupBy { - fn as_ref(&self) -> &polars::prelude::LazyGroupBy { - // The only case when there cannot be a lazy frame is if it is created - // using the default function or if created by deserializing something - self.group_by - .as_ref() - .expect("there should always be a frame") - } -} - -impl AsMut for NuLazyGroupBy { - fn as_mut(&mut self) -> &mut polars::prelude::LazyGroupBy { - // The only case when there cannot be a lazy frame is if it is created - // using the default function or if created by deserializing something - self.group_by - .as_mut() - .expect("there should always be a frame") - } -} - -impl From for NuLazyGroupBy { - fn from(group_by: LazyGroupBy) -> Self { - Self { - group_by: Some(group_by), - from_eager: false, - schema: None, - } - } -} - -impl NuLazyGroupBy { - pub fn into_value(self, span: Span) -> Value { - Value::custom(Box::new(self), span) - } - - pub fn into_polars(self) -> LazyGroupBy { - self.group_by.expect("GroupBy cannot be none to convert") - } - - pub fn try_from_value(value: Value) -> Result { - let span = value.span(); - match value { - Value::Custom { val, .. } => match val.as_any().downcast_ref::() { - Some(group) => Ok(Self { - group_by: group.group_by.clone(), - schema: group.schema.clone(), - from_eager: group.from_eager, - }), - None => Err(ShellError::CantConvert { - to_type: "lazy groupby".into(), - from_type: "custom value".into(), - span, - help: None, - }), - }, - x => Err(ShellError::CantConvert { - to_type: "lazy groupby".into(), - from_type: x.get_type().to_string(), - span: x.span(), - help: None, - }), - } - } - - pub fn try_from_pipeline(input: PipelineData, span: Span) -> Result { - let value = input.into_value(span)?; - Self::try_from_value(value) - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/values/nu_schema.rs b/crates/nu-cmd-dataframe/src/dataframe/values/nu_schema.rs deleted file mode 100644 index 3c2f689b85..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/values/nu_schema.rs +++ /dev/null @@ -1,376 +0,0 @@ -use nu_protocol::{ShellError, Span, Value}; -use polars::prelude::{DataType, Field, Schema, SchemaRef, TimeUnit}; -use std::sync::Arc; - -#[derive(Debug, Clone)] -pub struct NuSchema { - pub schema: SchemaRef, -} - -impl NuSchema { - pub fn new(schema: Schema) -> Self { - Self { - schema: Arc::new(schema), - } - } -} - -impl TryFrom<&Value> for NuSchema { - type Error = ShellError; - fn try_from(value: &Value) -> Result { - let schema = value_to_schema(value, Span::unknown())?; - Ok(Self::new(schema)) - } -} - -impl From for Value { - fn from(schema: NuSchema) -> Self { - fields_to_value(schema.schema.iter_fields(), Span::unknown()) - } -} - -impl From for SchemaRef { - fn from(val: NuSchema) -> Self { - Arc::clone(&val.schema) - } -} - -fn fields_to_value(fields: impl Iterator, span: Span) -> Value { - let record = fields - .map(|field| { - let col = field.name().to_string(); - let val = dtype_to_value(field.data_type(), span); - (col, val) - }) - .collect(); - - Value::record(record, Span::unknown()) -} - -fn dtype_to_value(dtype: &DataType, span: Span) -> Value { - match dtype { - DataType::Struct(fields) => fields_to_value(fields.iter().cloned(), span), - _ => Value::string(dtype.to_string().replace('[', "<").replace(']', ">"), span), - } -} - -fn value_to_schema(value: &Value, span: Span) -> Result { - let fields = value_to_fields(value, span)?; - let schema = Schema::from_iter(fields); - Ok(schema) -} - -fn value_to_fields(value: &Value, span: Span) -> Result, ShellError> { - let fields = value - .as_record()? - .into_iter() - .map(|(col, val)| match val { - Value::Record { .. } => { - let fields = value_to_fields(val, span)?; - let dtype = DataType::Struct(fields); - Ok(Field::new(col, dtype)) - } - _ => { - let dtype = str_to_dtype(&val.coerce_string()?, span)?; - Ok(Field::new(col, dtype)) - } - }) - .collect::, ShellError>>()?; - Ok(fields) -} - -pub fn str_to_dtype(dtype: &str, span: Span) -> Result { - match dtype { - "bool" => Ok(DataType::Boolean), - "u8" => Ok(DataType::UInt8), - "u16" => Ok(DataType::UInt16), - "u32" => Ok(DataType::UInt32), - "u64" => Ok(DataType::UInt64), - "i8" => Ok(DataType::Int8), - "i16" => Ok(DataType::Int16), - "i32" => Ok(DataType::Int32), - "i64" => Ok(DataType::Int64), - "f32" => Ok(DataType::Float32), - "f64" => Ok(DataType::Float64), - "str" => Ok(DataType::String), - "binary" => Ok(DataType::Binary), - "date" => Ok(DataType::Date), - "time" => Ok(DataType::Time), - "null" => Ok(DataType::Null), - "unknown" => Ok(DataType::Unknown), - "object" => Ok(DataType::Object("unknown", None)), - _ if dtype.starts_with("list") => { - let dtype = dtype - .trim_start_matches("list") - .trim_start_matches('<') - .trim_end_matches('>') - .trim(); - let dtype = str_to_dtype(dtype, span)?; - Ok(DataType::List(Box::new(dtype))) - } - _ if dtype.starts_with("datetime") => { - let dtype = dtype - .trim_start_matches("datetime") - .trim_start_matches('<') - .trim_end_matches('>'); - let mut split = dtype.split(','); - let next = split - .next() - .ok_or_else(|| ShellError::GenericError { - error: "Invalid polars data type".into(), - msg: "Missing time unit".into(), - span: Some(span), - help: None, - inner: vec![], - })? - .trim(); - let time_unit = str_to_time_unit(next, span)?; - let next = split - .next() - .ok_or_else(|| ShellError::GenericError { - error: "Invalid polars data type".into(), - msg: "Missing time zone".into(), - span: Some(span), - help: None, - inner: vec![], - })? - .trim(); - let timezone = if "*" == next { - None - } else { - Some(next.to_string()) - }; - Ok(DataType::Datetime(time_unit, timezone)) - } - _ if dtype.starts_with("duration") => { - let inner = dtype.trim_start_matches("duration<").trim_end_matches('>'); - let next = inner - .split(',') - .next() - .ok_or_else(|| ShellError::GenericError { - error: "Invalid polars data type".into(), - msg: "Missing time unit".into(), - span: Some(span), - help: None, - inner: vec![], - })? - .trim(); - let time_unit = str_to_time_unit(next, span)?; - Ok(DataType::Duration(time_unit)) - } - _ => Err(ShellError::GenericError { - error: "Invalid polars data type".into(), - msg: format!("Unknown type: {dtype}"), - span: Some(span), - help: None, - inner: vec![], - }), - } -} - -fn str_to_time_unit(ts_string: &str, span: Span) -> Result { - match ts_string { - "ms" => Ok(TimeUnit::Milliseconds), - "us" | "μs" => Ok(TimeUnit::Microseconds), - "ns" => Ok(TimeUnit::Nanoseconds), - _ => Err(ShellError::GenericError { - error: "Invalid polars data type".into(), - msg: "Invalid time unit".into(), - span: Some(span), - help: None, - inner: vec![], - }), - } -} - -#[cfg(test)] -mod test { - - use nu_protocol::record; - - use super::*; - - #[test] - fn test_value_to_schema() { - let address = record! { - "street" => Value::test_string("str"), - "city" => Value::test_string("str"), - }; - - let value = Value::test_record(record! { - "name" => Value::test_string("str"), - "age" => Value::test_string("i32"), - "address" => Value::test_record(address) - }); - - let schema = value_to_schema(&value, Span::unknown()).unwrap(); - let expected = Schema::from_iter(vec![ - Field::new("name", DataType::String), - Field::new("age", DataType::Int32), - Field::new( - "address", - DataType::Struct(vec![ - Field::new("street", DataType::String), - Field::new("city", DataType::String), - ]), - ), - ]); - assert_eq!(schema, expected); - } - - #[test] - fn test_dtype_str_to_schema_simple_types() { - let dtype = "bool"; - let schema = str_to_dtype(dtype, Span::unknown()).unwrap(); - let expected = DataType::Boolean; - assert_eq!(schema, expected); - - let dtype = "u8"; - let schema = str_to_dtype(dtype, Span::unknown()).unwrap(); - let expected = DataType::UInt8; - assert_eq!(schema, expected); - - let dtype = "u16"; - let schema = str_to_dtype(dtype, Span::unknown()).unwrap(); - let expected = DataType::UInt16; - assert_eq!(schema, expected); - - let dtype = "u32"; - let schema = str_to_dtype(dtype, Span::unknown()).unwrap(); - let expected = DataType::UInt32; - assert_eq!(schema, expected); - - let dtype = "u64"; - let schema = str_to_dtype(dtype, Span::unknown()).unwrap(); - let expected = DataType::UInt64; - assert_eq!(schema, expected); - - let dtype = "i8"; - let schema = str_to_dtype(dtype, Span::unknown()).unwrap(); - let expected = DataType::Int8; - assert_eq!(schema, expected); - - let dtype = "i16"; - let schema = str_to_dtype(dtype, Span::unknown()).unwrap(); - let expected = DataType::Int16; - assert_eq!(schema, expected); - - let dtype = "i32"; - let schema = str_to_dtype(dtype, Span::unknown()).unwrap(); - let expected = DataType::Int32; - assert_eq!(schema, expected); - - let dtype = "i64"; - let schema = str_to_dtype(dtype, Span::unknown()).unwrap(); - let expected = DataType::Int64; - assert_eq!(schema, expected); - - let dtype = "str"; - let schema = str_to_dtype(dtype, Span::unknown()).unwrap(); - let expected = DataType::String; - assert_eq!(schema, expected); - - let dtype = "binary"; - let schema = str_to_dtype(dtype, Span::unknown()).unwrap(); - let expected = DataType::Binary; - assert_eq!(schema, expected); - - let dtype = "date"; - let schema = str_to_dtype(dtype, Span::unknown()).unwrap(); - let expected = DataType::Date; - assert_eq!(schema, expected); - - let dtype = "time"; - let schema = str_to_dtype(dtype, Span::unknown()).unwrap(); - let expected = DataType::Time; - assert_eq!(schema, expected); - - let dtype = "null"; - let schema = str_to_dtype(dtype, Span::unknown()).unwrap(); - let expected = DataType::Null; - assert_eq!(schema, expected); - - let dtype = "unknown"; - let schema = str_to_dtype(dtype, Span::unknown()).unwrap(); - let expected = DataType::Unknown; - assert_eq!(schema, expected); - - let dtype = "object"; - let schema = str_to_dtype(dtype, Span::unknown()).unwrap(); - let expected = DataType::Object("unknown", None); - assert_eq!(schema, expected); - } - - #[test] - fn test_dtype_str_schema_datetime() { - let dtype = "datetime"; - let schema = str_to_dtype(dtype, Span::unknown()).unwrap(); - let expected = DataType::Datetime(TimeUnit::Milliseconds, None); - assert_eq!(schema, expected); - - let dtype = "datetime"; - let schema = str_to_dtype(dtype, Span::unknown()).unwrap(); - let expected = DataType::Datetime(TimeUnit::Microseconds, None); - assert_eq!(schema, expected); - - let dtype = "datetime<μs, *>"; - let schema = str_to_dtype(dtype, Span::unknown()).unwrap(); - let expected = DataType::Datetime(TimeUnit::Microseconds, None); - assert_eq!(schema, expected); - - let dtype = "datetime"; - let schema = str_to_dtype(dtype, Span::unknown()).unwrap(); - let expected = DataType::Datetime(TimeUnit::Nanoseconds, None); - assert_eq!(schema, expected); - - let dtype = "datetime"; - let schema = str_to_dtype(dtype, Span::unknown()).unwrap(); - let expected = DataType::Datetime(TimeUnit::Milliseconds, Some("UTC".into())); - assert_eq!(schema, expected); - - let dtype = "invalid"; - let schema = str_to_dtype(dtype, Span::unknown()); - assert!(schema.is_err()) - } - - #[test] - fn test_dtype_str_schema_duration() { - let dtype = "duration"; - let schema = str_to_dtype(dtype, Span::unknown()).unwrap(); - let expected = DataType::Duration(TimeUnit::Milliseconds); - assert_eq!(schema, expected); - - let dtype = "duration"; - let schema = str_to_dtype(dtype, Span::unknown()).unwrap(); - let expected = DataType::Duration(TimeUnit::Microseconds); - assert_eq!(schema, expected); - - let dtype = "duration<μs>"; - let schema = str_to_dtype(dtype, Span::unknown()).unwrap(); - let expected = DataType::Duration(TimeUnit::Microseconds); - assert_eq!(schema, expected); - - let dtype = "duration"; - let schema = str_to_dtype(dtype, Span::unknown()).unwrap(); - let expected = DataType::Duration(TimeUnit::Nanoseconds); - assert_eq!(schema, expected); - } - - #[test] - fn test_dtype_str_to_schema_list_types() { - let dtype = "list"; - let schema = str_to_dtype(dtype, Span::unknown()).unwrap(); - let expected = DataType::List(Box::new(DataType::Int32)); - assert_eq!(schema, expected); - - let dtype = "list>"; - let schema = str_to_dtype(dtype, Span::unknown()).unwrap(); - let expected = DataType::List(Box::new(DataType::Duration(TimeUnit::Milliseconds))); - assert_eq!(schema, expected); - - let dtype = "list>"; - let schema = str_to_dtype(dtype, Span::unknown()).unwrap(); - let expected = DataType::List(Box::new(DataType::Datetime(TimeUnit::Milliseconds, None))); - assert_eq!(schema, expected); - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/values/nu_when/custom_value.rs b/crates/nu-cmd-dataframe/src/dataframe/values/nu_when/custom_value.rs deleted file mode 100644 index e2b73bcef1..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/values/nu_when/custom_value.rs +++ /dev/null @@ -1,41 +0,0 @@ -use super::NuWhen; -use nu_protocol::{CustomValue, ShellError, Span, Value}; - -// CustomValue implementation for NuDataFrame -impl CustomValue for NuWhen { - fn typetag_name(&self) -> &'static str { - "when" - } - - fn typetag_deserialize(&self) { - unimplemented!("typetag_deserialize") - } - - fn clone_value(&self, span: nu_protocol::Span) -> Value { - let cloned = self.clone(); - - Value::custom(Box::new(cloned), span) - } - - fn type_name(&self) -> String { - self.typetag_name().to_string() - } - - fn to_base_value(&self, span: Span) -> Result { - let val: String = match self { - NuWhen::Then(_) => "whenthen".into(), - NuWhen::ChainedThen(_) => "whenthenthen".into(), - }; - - let value = Value::string(val, span); - Ok(value) - } - - fn as_any(&self) -> &dyn std::any::Any { - self - } - - fn as_mut_any(&mut self) -> &mut dyn std::any::Any { - self - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/values/nu_when/mod.rs b/crates/nu-cmd-dataframe/src/dataframe/values/nu_when/mod.rs deleted file mode 100644 index b33cde7483..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/values/nu_when/mod.rs +++ /dev/null @@ -1,77 +0,0 @@ -mod custom_value; - -use core::fmt; -use nu_protocol::{ShellError, Span, Value}; -use polars::prelude::{col, when, ChainedThen, Then}; -use serde::{Deserialize, Deserializer, Serialize, Serializer}; - -#[derive(Clone)] -pub enum NuWhen { - Then(Box), - ChainedThen(ChainedThen), -} - -// Mocked serialization of the LazyFrame object -impl Serialize for NuWhen { - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - serializer.serialize_none() - } -} - -// Mocked deserialization of the LazyFrame object -impl<'de> Deserialize<'de> for NuWhen { - fn deserialize(_deserializer: D) -> Result - where - D: Deserializer<'de>, - { - Ok(NuWhen::Then(Box::new(when(col("a")).then(col("b"))))) - } -} - -impl fmt::Debug for NuWhen { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "NuWhen") - } -} - -impl From for NuWhen { - fn from(then: Then) -> Self { - NuWhen::Then(Box::new(then)) - } -} - -impl From for NuWhen { - fn from(chained_when: ChainedThen) -> Self { - NuWhen::ChainedThen(chained_when) - } -} - -impl NuWhen { - pub fn into_value(self, span: Span) -> Value { - Value::custom(Box::new(self), span) - } - - pub fn try_from_value(value: Value) -> Result { - let span = value.span(); - match value { - Value::Custom { val, .. } => match val.as_any().downcast_ref::() { - Some(expr) => Ok(expr.clone()), - None => Err(ShellError::CantConvert { - to_type: "when expression".into(), - from_type: "non when expression".into(), - span, - help: None, - }), - }, - x => Err(ShellError::CantConvert { - to_type: "when expression".into(), - from_type: x.get_type().to_string(), - span: x.span(), - help: None, - }), - } - } -} diff --git a/crates/nu-cmd-dataframe/src/dataframe/values/utils.rs b/crates/nu-cmd-dataframe/src/dataframe/values/utils.rs deleted file mode 100644 index 0dc43399a3..0000000000 --- a/crates/nu-cmd-dataframe/src/dataframe/values/utils.rs +++ /dev/null @@ -1,86 +0,0 @@ -use nu_protocol::{ShellError, Span, Spanned, Value}; - -// Default value used when selecting rows from dataframe -pub const DEFAULT_ROWS: usize = 5; - -// Converts a Vec to a Vec> with a Span marking the whole -// location of the columns for error referencing -pub(crate) fn convert_columns( - columns: Vec, - span: Span, -) -> Result<(Vec>, Span), ShellError> { - // First column span - let mut col_span = columns - .first() - .ok_or_else(|| ShellError::GenericError { - error: "Empty column list".into(), - msg: "Empty list found for command".into(), - span: Some(span), - help: None, - inner: vec![], - }) - .map(|v| v.span())?; - - let res = columns - .into_iter() - .map(|value| { - let span = value.span(); - match value { - Value::String { val, .. } => { - col_span = col_span.merge(span); - Ok(Spanned { item: val, span }) - } - _ => Err(ShellError::GenericError { - error: "Incorrect column format".into(), - msg: "Only string as column name".into(), - span: Some(span), - help: None, - inner: vec![], - }), - } - }) - .collect::>, _>>()?; - - Ok((res, col_span)) -} - -// Converts a Vec to a Vec with a Span marking the whole -// location of the columns for error referencing -pub(crate) fn convert_columns_string( - columns: Vec, - span: Span, -) -> Result<(Vec, Span), ShellError> { - // First column span - let mut col_span = columns - .first() - .ok_or_else(|| ShellError::GenericError { - error: "Empty column list".into(), - msg: "Empty list found for command".into(), - span: Some(span), - help: None, - inner: vec![], - }) - .map(|v| v.span())?; - - let res = columns - .into_iter() - .map(|value| { - let span = value.span(); - match value { - Value::String { val, .. } => { - col_span = col_span.merge(span); - Ok(val) - } - _ => Err(ShellError::GenericError { - error: "Incorrect column format".into(), - msg: "Only string as column name".into(), - span: Some(span), - help: None, - inner: vec![], - }), - } - }) - .collect::, _>>()?; - - Ok((res, col_span)) -} diff --git a/crates/nu-cmd-dataframe/src/lib.rs b/crates/nu-cmd-dataframe/src/lib.rs deleted file mode 100644 index 7e7c8014c6..0000000000 --- a/crates/nu-cmd-dataframe/src/lib.rs +++ /dev/null @@ -1,4 +0,0 @@ -#[cfg(feature = "dataframe")] -pub mod dataframe; -#[cfg(feature = "dataframe")] -pub use dataframe::*; diff --git a/crates/nu-cmd-lang/Cargo.toml b/crates/nu-cmd-lang/Cargo.toml index 7592d4303b..abb89a6991 100644 --- a/crates/nu-cmd-lang/Cargo.toml +++ b/crates/nu-cmd-lang/Cargo.toml @@ -28,6 +28,5 @@ mimalloc = [] which-support = [] trash-support = [] sqlite = [] -dataframe = [] static-link-openssl = [] system-clipboard = [] diff --git a/crates/nu-cmd-lang/README.md b/crates/nu-cmd-lang/README.md index 836fabf6f8..6c7298c17a 100644 --- a/crates/nu-cmd-lang/README.md +++ b/crates/nu-cmd-lang/README.md @@ -8,7 +8,6 @@ top of including: * nu-command * nu-cli -* nu-cmd-dataframe * nu-cmd-extra As time goes on and the nu language develops further in parallel with nushell we will be adding other command crates to the system. diff --git a/crates/nu-cmd-lang/src/core_commands/version.rs b/crates/nu-cmd-lang/src/core_commands/version.rs index 7aaa72b38a..22ef1f2a08 100644 --- a/crates/nu-cmd-lang/src/core_commands/version.rs +++ b/crates/nu-cmd-lang/src/core_commands/version.rs @@ -175,11 +175,6 @@ fn features_enabled() -> Vec { names.push("sqlite".to_string()); } - #[cfg(feature = "dataframe")] - { - names.push("dataframe".to_string()); - } - #[cfg(feature = "static-link-openssl")] { names.push("static-link-openssl".to_string()); diff --git a/crates/nu-command/tests/commands/open.rs b/crates/nu-command/tests/commands/open.rs index f857718c72..ac5e2f99e5 100644 --- a/crates/nu-command/tests/commands/open.rs +++ b/crates/nu-command/tests/commands/open.rs @@ -238,22 +238,6 @@ fn parses_xml() { assert_eq!(actual.out, "https://www.jntrnr.com/off-to-new-adventures/") } -#[cfg(feature = "dataframe")] -#[test] -fn parses_arrow_ipc() { - let actual = nu!( - cwd: "tests/fixtures/formats", pipeline( - " - dfr open caco3_plastics.arrow - | dfr into-nu - | first - | get origin - " - )); - - assert_eq!(actual.out, "SPAIN") -} - #[test] fn errors_if_file_not_found() { let actual = nu!( diff --git a/crates/nu-command/tests/main.rs b/crates/nu-command/tests/main.rs index 72ccee3d85..1039e1a0d7 100644 --- a/crates/nu-command/tests/main.rs +++ b/crates/nu-command/tests/main.rs @@ -151,9 +151,8 @@ fn commands_declare_input_output_types() { let sig_name = cmd.signature().name; let category = cmd.signature().category; - if matches!(category, Category::Removed | Category::Custom(_)) { + if matches!(category, Category::Removed) { // Deprecated/Removed commands don't have to conform - // TODO: also upgrade the `--features dataframe` commands continue; } diff --git a/crates/nu-protocol/src/errors/shell_error.rs b/crates/nu-protocol/src/errors/shell_error.rs index 81139d1a52..a8ca52142c 100644 --- a/crates/nu-protocol/src/errors/shell_error.rs +++ b/crates/nu-protocol/src/errors/shell_error.rs @@ -1042,19 +1042,6 @@ pub enum ShellError { span: Span, }, - /// A custom value could not be converted to a Dataframe. - /// - /// ## Resolution - /// - /// Make sure conversion to a Dataframe is possible for this value or convert it to a type that does, first. - #[error("Casting error")] - #[diagnostic(code(nu::shell::downcast_not_possible))] - DowncastNotPossible { - msg: String, - #[label("{msg}")] - span: Span, - }, - /// The value given for this configuration is not supported. /// /// ## Resolution diff --git a/crates/nu_plugin_polars/Cargo.toml b/crates/nu_plugin_polars/Cargo.toml index 99c95f33e4..df30c71c52 100644 --- a/crates/nu_plugin_polars/Cargo.toml +++ b/crates/nu_plugin_polars/Cargo.toml @@ -4,7 +4,7 @@ description = "Nushell dataframe plugin commands based on polars." edition = "2021" license = "MIT" name = "nu_plugin_polars" -repository = "https://github.com/nushell/nushell/tree/main/crates/nu-cmd-dataframe" +repository = "https://github.com/nushell/nushell/tree/main/crates/nu_plugin_polars" version = "0.93.1" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html diff --git a/devdocs/PLATFORM_SUPPORT.md b/devdocs/PLATFORM_SUPPORT.md index 46efdfbe5c..dd393ec897 100644 --- a/devdocs/PLATFORM_SUPPORT.md +++ b/devdocs/PLATFORM_SUPPORT.md @@ -33,12 +33,8 @@ We will try to provide builds for all of them but a standard configuration for x We have features of Nushell behind flags that can be passed at compilation time. -The design focus of Nushell is primarily expressed by everything accessible without passing additional feature flag. This provides a standard command set and receives the most attention. - -One option feature flag is currently tested in CI but contains a feature that may be moved to a plugin: -- `dataframe` - - This includes dataframe support via `polars` and `arrow2`. Introduces a significant additional compilation and binary size. - - Due to the use of SIMD extensions may not be compatible with every minimal architecture. +The design focus of Nushell is primarily expressed by everything accessible without passing additional feature flag. +This provides a standard command set and receives the most attention. ## Passively supported platforms diff --git a/scripts/build-all-maclin.sh b/scripts/build-all-maclin.sh index 5490695573..4658161874 100755 --- a/scripts/build-all-maclin.sh +++ b/scripts/build-all-maclin.sh @@ -6,7 +6,7 @@ DIR=$(readlink -f $(dirname "${BASH_SOURCE[0]}")) REPO_ROOT=$(dirname $DIR) echo "---------------------------------------------------------------" -echo "Building nushell (nu) with dataframes and all the plugins" +echo "Building nushell (nu) and all the plugins" echo "---------------------------------------------------------------" echo "" @@ -21,7 +21,7 @@ NU_PLUGINS=( echo "Building nushell" ( cd $REPO_ROOT - cargo build --features=dataframe --locked + cargo build --locked ) for plugin in "${NU_PLUGINS[@]}" diff --git a/scripts/build-all-windows.cmd b/scripts/build-all-windows.cmd index 2619a294d0..b8a4c70ea2 100644 --- a/scripts/build-all-windows.cmd +++ b/scripts/build-all-windows.cmd @@ -1,11 +1,11 @@ @echo off echo ------------------------------------------------------------------- -echo Building nushell (nu.exe) with dataframes and all the plugins +echo Building nushell (nu.exe) and all the plugins echo ------------------------------------------------------------------- echo. echo Building nushell.exe -cargo build --features=dataframe --locked +cargo build --locked echo. call :build crates\nu_plugin_example nu_plugin_example.exe diff --git a/scripts/build-all.nu b/scripts/build-all.nu index 2ad7ec467e..92fcc64635 100644 --- a/scripts/build-all.nu +++ b/scripts/build-all.nu @@ -1,7 +1,7 @@ use std log warning print '-------------------------------------------------------------------' -print 'Building nushell (nu) with dataframes and all the plugins' +print 'Building nushell (nu) and all the plugins' print '-------------------------------------------------------------------' warning "./scripts/build-all.nu will be deprecated, please use the `toolkit build` command instead" @@ -13,7 +13,7 @@ def build-nushell [] { print '----------------------------' cd $repo_root - cargo build --features=dataframe --locked + cargo build --locked } def build-plugin [] { diff --git a/scripts/install-all.ps1 b/scripts/install-all.ps1 index 0569de4ad3..1dfc3b2dfd 100644 --- a/scripts/install-all.ps1 +++ b/scripts/install-all.ps1 @@ -2,13 +2,13 @@ # Usage: Just run `powershell install-all.ps1` in nushell root directory Write-Output "-----------------------------------------------------------------" -Write-Output "Installing nushell (nu) with dataframes and all the plugins" +Write-Output "Installing nushell (nu) and all the plugins" Write-Output "-----------------------------------------------------------------" Write-Output "" Write-Output "Install nushell from local..." Write-Output "----------------------------------------------" -cargo install --force --path . --features=dataframe --locked +cargo install --force --path . --locked $NU_PLUGINS = @( 'nu_plugin_example', diff --git a/scripts/install-all.sh b/scripts/install-all.sh index b53d53aa3b..b6c0f1fbfb 100755 --- a/scripts/install-all.sh +++ b/scripts/install-all.sh @@ -6,13 +6,13 @@ DIR=$(readlink -f $(dirname "${BASH_SOURCE[0]}")) REPO_ROOT=$(dirname $DIR) echo "-----------------------------------------------------------------" -echo "Installing nushell (nu) with dataframes and all the plugins" +echo "Installing nushell (nu) and all the plugins" echo "-----------------------------------------------------------------" echo "" echo "Install nushell from local..." echo "----------------------------------------------" -cargo install --force --path "$REPO_ROOT" --features=dataframe --locked +cargo install --force --path "$REPO_ROOT" --locked NU_PLUGINS=( 'nu_plugin_inc' diff --git a/src/main.rs b/src/main.rs index d0fc023b68..bc48a9f7de 100644 --- a/src/main.rs +++ b/src/main.rs @@ -43,8 +43,6 @@ fn get_engine_state() -> EngineState { let engine_state = nu_cmd_plugin::add_plugin_command_context(engine_state); let engine_state = nu_command::add_shell_command_context(engine_state); let engine_state = nu_cmd_extra::add_extra_command_context(engine_state); - #[cfg(feature = "dataframe")] - let engine_state = nu_cmd_dataframe::add_dataframe_context(engine_state); let engine_state = nu_cli::add_cli_context(engine_state); nu_explore::add_explore_context(engine_state) } diff --git a/toolkit.nu b/toolkit.nu index 407998c514..6f19900fdb 100644 --- a/toolkit.nu +++ b/toolkit.nu @@ -514,7 +514,7 @@ export def "benchmark-and-log-result" [] { cargo export $"target/($current_branch)" -- bench ^$"./target/($current_branch)/benchmarks" compare -o -s 50 --dump $res_path -} +} # Build all Windows archives and MSIs for release manually # @@ -536,25 +536,14 @@ export def 'release-pkg windows' [ mkdir $artifacts_dir for target in ["aarch64" "x86_64"] { $env.TARGET = $target ++ "-pc-windows-msvc" - for release_type in ["" full] { - $env.RELEASE_TYPE = $release_type - $env.TARGET_RUSTFLAGS = if $release_type == "full" { - "--features=dataframe" - } else { - "" - } - let out_filename = if $release_type == "full" { - $target ++ "-windows-msvc-full" - } else { - $target ++ "-pc-windows-msvc" - } - rm -rf output - _EXTRA_=bin nu .github/workflows/release-pkg.nu - cp $"output/nu-($version)-($out_filename).zip" $artifacts_dir - rm -rf output - _EXTRA_=msi nu .github/workflows/release-pkg.nu - cp $"target/wix/nu-($version)-($out_filename).msi" $artifacts_dir - } + + rm -rf output + _EXTRA_=bin nu .github/workflows/release-pkg.nu + cp $"output/nu-($version)-($target)-pc-windows-msvc.zip" $artifacts_dir + + rm -rf output + _EXTRA_=msi nu .github/workflows/release-pkg.nu + cp $"target/wix/nu-($version)-($target)-pc-windows-msvc.msi" $artifacts_dir } } From 6e050f5634031c1bdcc5b79ab34e8ed9a69fd08e Mon Sep 17 00:00:00 2001 From: Reilly Wood <26268125+rgwood@users.noreply.github.com> Date: Mon, 20 May 2024 13:03:21 -0700 Subject: [PATCH 058/137] `explore`: consolidate padding config, handle ByteStream, tweak naming+comments (#12915) Some minor changes to `explore`, continuing on my mission to simplify the command in preparation for a larger UX overhaul: 1. Consolidate padding configuration. I don't think we need separate config points for the (optional) index column and regular data columns in the normal pager, they can share padding configuration. Likewise, in the binary viewer all 3 columns (index, data, ASCII) had their left+right padding configured independently. 2. Update `explore` so we use the binary viewer for the new `ByteStream` type. `cat foo.txt | into binary | explore` was not using the binary viewer after the `ByteStream` changes. 3. Tweak the naming of a few helper functions, add a comment I've put the changes in separate commits to make them easier to review. --------- Co-authored-by: Stefan Holderbach --- crates/nu-explore/src/commands/table.rs | 12 --- crates/nu-explore/src/lib.rs | 14 ++- .../src/views/binary/binary_widget.rs | 75 ++++++--------- crates/nu-explore/src/views/binary/mod.rs | 17 +--- crates/nu-explore/src/views/record/mod.rs | 26 ++--- crates/nu-explore/src/views/record/tablew.rs | 96 ++++++++++--------- 6 files changed, 98 insertions(+), 142 deletions(-) diff --git a/crates/nu-explore/src/commands/table.rs b/crates/nu-explore/src/commands/table.rs index 7b778a8971..39ef8f0f99 100644 --- a/crates/nu-explore/src/commands/table.rs +++ b/crates/nu-explore/src/commands/table.rs @@ -25,8 +25,6 @@ struct TableSettings { selected_column_s: Option