From 3d8394a909d4d80829012e3a6f5f347a4e5f0a83 Mon Sep 17 00:00:00 2001 From: Luccas Mateus Date: Thu, 2 Dec 2021 23:02:22 -0300 Subject: [PATCH] `to csv` and `to tsv` (#412) * MathEval Variance and Stddev * Fix tests and linting * Typo * Deal with streams when they are not tables * ToTsv and ToCsv --- crates/nu-command/src/default_context.rs | 2 + crates/nu-command/src/formats/to/csv.rs | 106 +++++++++++++ crates/nu-command/src/formats/to/delimited.rs | 149 ++++++++++++++++++ crates/nu-command/src/formats/to/mod.rs | 5 + crates/nu-command/src/formats/to/tsv.rs | 69 ++++++++ crates/nu-protocol/src/value/mod.rs | 32 ++++ 6 files changed, 363 insertions(+) create mode 100644 crates/nu-command/src/formats/to/csv.rs create mode 100644 crates/nu-command/src/formats/to/delimited.rs create mode 100644 crates/nu-command/src/formats/to/tsv.rs diff --git a/crates/nu-command/src/default_context.rs b/crates/nu-command/src/default_context.rs index f003f45a2e..c0ab476201 100644 --- a/crates/nu-command/src/default_context.rs +++ b/crates/nu-command/src/default_context.rs @@ -157,6 +157,8 @@ pub fn create_default_context() -> EngineState { ToJson, ToUrl, ToToml, + ToTsv, + ToCsv, Touch, Use, Update, diff --git a/crates/nu-command/src/formats/to/csv.rs b/crates/nu-command/src/formats/to/csv.rs new file mode 100644 index 0000000000..269622ceaf --- /dev/null +++ b/crates/nu-command/src/formats/to/csv.rs @@ -0,0 +1,106 @@ +use crate::formats::to::delimited::to_delimited_data; +use nu_engine::CallExt; +use nu_protocol::ast::Call; +use nu_protocol::engine::{Command, EngineState, Stack}; +use nu_protocol::{ + Category, Config, Example, PipelineData, ShellError, Signature, Span, Spanned, SyntaxShape, + Value, +}; + +#[derive(Clone)] +pub struct ToCsv; + +impl Command for ToCsv { + fn name(&self) -> &str { + "to csv" + } + + fn signature(&self) -> Signature { + Signature::build("to csv") + .named( + "separator", + SyntaxShape::String, + "a character to separate columns, defaults to ','", + Some('s'), + ) + .switch( + "noheaders", + "do not output the columns names as the first row", + Some('n'), + ) + .category(Category::Formats) + } + + fn examples(&self) -> Vec { + vec![ + Example { + description: "Outputs an CSV string representing the contents of this table", + example: "[[foo bar]; [1 2]] | to csv", + result: Some(Value::test_string("foo,bar\n1,2\n")), + }, + Example { + description: "Outputs an CSV string representing the contents of this table", + example: "[[foo bar]; [1 2]] | to csv -s ';' ", + result: Some(Value::test_string("foo;bar\n1;2\n")), + }, + ] + } + + fn usage(&self) -> &str { + "Convert table into .csv text " + } + + fn run( + &self, + engine_state: &EngineState, + stack: &mut Stack, + call: &Call, + input: PipelineData, + ) -> Result { + let head = call.head; + let noheaders = call.has_flag("noheaders"); + let separator: Option> = call.get_flag(engine_state, stack, "separator")?; + let config = stack.get_config()?; + to_csv(input, noheaders, separator, head, config) + } +} + +fn to_csv( + input: PipelineData, + noheaders: bool, + separator: Option>, + head: Span, + config: Config, +) -> Result { + let sep = match separator { + Some(Spanned { item: s, span, .. }) => { + if s == r"\t" { + '\t' + } else { + let vec_s: Vec = s.chars().collect(); + if vec_s.len() != 1 { + return Err(ShellError::UnsupportedInput( + "Expected a single separator char from --separator".to_string(), + span, + )); + }; + vec_s[0] + } + } + _ => ',', + }; + + to_delimited_data(noheaders, sep, "CSV", input, head, config) +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_examples() { + use crate::test_examples; + + test_examples(ToCsv {}) + } +} diff --git a/crates/nu-command/src/formats/to/delimited.rs b/crates/nu-command/src/formats/to/delimited.rs new file mode 100644 index 0000000000..6ba5dc1555 --- /dev/null +++ b/crates/nu-command/src/formats/to/delimited.rs @@ -0,0 +1,149 @@ +use csv::WriterBuilder; +use indexmap::{indexset, IndexSet}; +use nu_protocol::{Config, IntoPipelineData, PipelineData, ShellError, Span, Value}; +use std::collections::VecDeque; + +fn from_value_to_delimited_string( + value: &Value, + separator: char, + config: &Config, +) -> Result { + match value { + Value::Record { cols, vals, span } => { + let mut wtr = WriterBuilder::new() + .delimiter(separator as u8) + .from_writer(vec![]); + let mut fields: VecDeque = VecDeque::new(); + let mut values: VecDeque = VecDeque::new(); + + for (k, v) in cols.iter().zip(vals.iter()) { + fields.push_back(k.clone()); + + values.push_back(to_string_tagged_value(v, config)?); + } + + wtr.write_record(fields).expect("can not write."); + wtr.write_record(values).expect("can not write."); + + let v = String::from_utf8(wtr.into_inner().map_err(|_| { + ShellError::UnsupportedInput("Could not convert record".to_string(), *span) + })?) + .map_err(|_| { + ShellError::UnsupportedInput("Could not convert record".to_string(), *span) + })?; + Ok(v) + } + Value::List { vals, span } => { + let mut wtr = WriterBuilder::new() + .delimiter(separator as u8) + .from_writer(vec![]); + + let merged_descriptors = merge_descriptors(vals); + + if merged_descriptors.is_empty() { + wtr.write_record( + vals.iter() + .map(|ele| { + to_string_tagged_value(ele, config).unwrap_or_else(|_| String::new()) + }) + .collect::>(), + ) + .expect("can not write"); + } else { + wtr.write_record(merged_descriptors.iter().map(|item| &item[..])) + .expect("can not write."); + + for l in vals { + let mut row = vec![]; + for desc in &merged_descriptors { + row.push(match l.to_owned().get_data_by_key(desc) { + Some(s) => to_string_tagged_value(&s, config)?, + None => String::new(), + }); + } + wtr.write_record(&row).expect("can not write"); + } + } + let v = String::from_utf8(wtr.into_inner().map_err(|_| { + ShellError::UnsupportedInput("Could not convert record".to_string(), *span) + })?) + .map_err(|_| { + ShellError::UnsupportedInput("Could not convert record".to_string(), *span) + })?; + Ok(v) + } + _ => to_string_tagged_value(value, config), + } +} + +fn to_string_tagged_value(v: &Value, config: &Config) -> Result { + match &v { + Value::String { .. } + | Value::Bool { .. } + | Value::Int { .. } + | Value::Duration { .. } + | Value::Binary { .. } + | Value::CustomValue { .. } + | Value::Error { .. } + | Value::Filesize { .. } + | Value::CellPath { .. } + | Value::Float { .. } => Ok(v.clone().into_string("", config)), + Value::Date { val, .. } => Ok(val.to_string()), + Value::Nothing { .. } => Ok(String::new()), + Value::List { ref vals, .. } => match &vals[..] { + [Value::Record { .. }, _end @ ..] => Ok(String::from("[Table]")), + _ => Ok(String::from("[List]")), + }, + Value::Record { .. } => Ok(String::from("[Row]")), + _ => Err(ShellError::UnsupportedInput( + "Unexpected value".to_string(), + v.span().unwrap_or_else(|_| Span::unknown()), + )), + } +} + +fn merge_descriptors(values: &[Value]) -> Vec { + let mut ret: Vec = vec![]; + let mut seen: IndexSet = indexset! {}; + for value in values { + let data_descriptors = match value { + Value::Record { cols, .. } => cols.to_owned(), + _ => vec![], + }; + for desc in data_descriptors { + if !seen.contains(&desc) { + seen.insert(desc.to_string()); + ret.push(desc.to_string()); + } + } + } + ret +} + +pub fn to_delimited_data( + noheaders: bool, + sep: char, + format_name: &'static str, + input: PipelineData, + span: Span, + config: Config, +) -> Result { + let value = input.into_value(span); + let output = match from_value_to_delimited_string(&value, sep, &config) { + Ok(mut x) => { + if noheaders { + if let Some(second_line) = x.find('\n') { + let start = second_line + 1; + x.replace_range(0..start, ""); + } + } + Ok(x) + } + Err(_) => Err(ShellError::CantConvert( + format_name.into(), + value.get_type().to_string(), + value.span().unwrap_or_else(|_| Span::unknown()), + )), + }?; + Ok(Value::string(output, span).into_pipeline_data()) +} diff --git a/crates/nu-command/src/formats/to/mod.rs b/crates/nu-command/src/formats/to/mod.rs index 456297235f..29f631915c 100644 --- a/crates/nu-command/src/formats/to/mod.rs +++ b/crates/nu-command/src/formats/to/mod.rs @@ -1,9 +1,14 @@ mod command; +mod csv; +mod delimited; mod json; mod toml; +mod tsv; mod url; +pub use self::csv::ToCsv; pub use self::toml::ToToml; pub use command::To; pub use json::ToJson; +pub use tsv::ToTsv; pub use url::ToUrl; diff --git a/crates/nu-command/src/formats/to/tsv.rs b/crates/nu-command/src/formats/to/tsv.rs new file mode 100644 index 0000000000..70d4d3ea25 --- /dev/null +++ b/crates/nu-command/src/formats/to/tsv.rs @@ -0,0 +1,69 @@ +use crate::formats::to::delimited::to_delimited_data; +use nu_protocol::ast::Call; +use nu_protocol::engine::{Command, EngineState, Stack}; +use nu_protocol::{Category, Config, Example, PipelineData, ShellError, Signature, Span, Value}; + +#[derive(Clone)] +pub struct ToTsv; + +impl Command for ToTsv { + fn name(&self) -> &str { + "to tsv" + } + + fn signature(&self) -> Signature { + Signature::build("to tsv") + .switch( + "noheaders", + "do not output the column names as the first row", + Some('n'), + ) + .category(Category::Formats) + } + + fn usage(&self) -> &str { + "Convert table into .tsv text" + } + + fn examples(&self) -> Vec { + vec![Example { + description: "Outputs an TSV string representing the contents of this table", + example: "[[foo bar]; [1 2]] | to tsv", + result: Some(Value::test_string("foo\tbar\n1\t2\n")), + }] + } + + fn run( + &self, + _engine_state: &EngineState, + stack: &mut Stack, + call: &Call, + input: PipelineData, + ) -> Result { + let head = call.head; + let noheaders = call.has_flag("noheaders"); + let config = stack.get_config()?; + to_tsv(input, noheaders, head, config) + } +} + +fn to_tsv( + input: PipelineData, + noheaders: bool, + head: Span, + config: Config, +) -> Result { + to_delimited_data(noheaders, '\t', "TSV", input, head, config) +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_examples() { + use crate::test_examples; + + test_examples(ToTsv {}) + } +} diff --git a/crates/nu-protocol/src/value/mod.rs b/crates/nu-protocol/src/value/mod.rs index 3ae34542c6..b2aebbd95a 100644 --- a/crates/nu-protocol/src/value/mod.rs +++ b/crates/nu-protocol/src/value/mod.rs @@ -287,6 +287,38 @@ impl Value { } } + pub fn get_data_by_key(&self, name: &str) -> Option { + match self { + Value::Record { cols, vals, .. } => cols + .iter() + .zip(vals.iter()) + .find(|(col, _)| col == &name) + .map(|(_, val)| val.clone()), + Value::List { vals, span } => { + let mut out = vec![]; + for item in vals { + match item { + Value::Record { .. } => match item.get_data_by_key(name) { + Some(v) => out.push(v), + None => out.push(Value::nothing(*span)), + }, + _ => out.push(Value::nothing(*span)), + } + } + + if !out.is_empty() { + Some(Value::List { + vals: out, + span: *span, + }) + } else { + None + } + } + _ => None, + } + } + /// Convert Value into string. Note that Streams will be consumed. pub fn into_string(self, separator: &str, config: &Config) -> String { match self {