diff --git a/crates/nu-command/src/formats/from/xml.rs b/crates/nu-command/src/formats/from/xml.rs index ee29262e20..d39061fc81 100644 --- a/crates/nu-command/src/formats/from/xml.rs +++ b/crates/nu-command/src/formats/from/xml.rs @@ -1,3 +1,4 @@ +use crate::formats::nu_xml_format::{COLUMN_ATTRS_NAME, COLUMN_CONTENT_NAME, COLUMN_TAG_NAME}; use indexmap::map::IndexMap; use nu_protocol::ast::Call; use nu_protocol::engine::{Command, EngineState, Stack}; @@ -5,6 +6,7 @@ use nu_protocol::{ Category, Example, IntoPipelineData, PipelineData, ShellError, Signature, Span, Spanned, Type, Value, }; +use roxmltree::NodeType; #[derive(Clone)] pub struct FromXml; @@ -17,6 +19,12 @@ impl Command for FromXml { fn signature(&self) -> Signature { Signature::build("from xml") .input_output_types(vec![(Type::String, Type::Record(vec![]))]) + .switch("keep-comments", "add comment nodes to result", None) + .switch( + "keep-pi", + "add processing instruction nodes to result", + None, + ) .category(Category::Formats) } @@ -24,6 +32,18 @@ impl Command for FromXml { "Parse text as .xml and create record." } + fn extra_usage(&self) -> &str { + r#"Every XML entry is represented via a record with tag, attribute and content fields. +To represent different types of entries different values are written to this fields: +1. Tag entry: {tag: attrs: {: "" ...} content: []} +2. Comment entry: {tag: '!' attrs: null content: ""} +3. Processing instruction (PI): {tag: '?' attrs: null content: ""} +4. Text: {tag: null attrs: null content: ""}. + +Unlike to xml command all null values are always present and text is never represented via plain +string. This way content of every tag is always a table and is easier to parse"# + } + fn run( &self, _engine_state: &EngineState, @@ -32,7 +52,14 @@ impl Command for FromXml { input: PipelineData, ) -> Result { let head = call.head; - from_xml(input, head) + let keep_comments = call.has_flag("keep-comments"); + let keep_processing_instructions = call.has_flag("keep-pi"); + let info = ParsingInfo { + span: head, + keep_comments, + keep_processing_instructions, + }; + from_xml(input, &info) } fn examples(&self) -> Vec { @@ -42,51 +69,52 @@ impl Command for FromXml { Event ' | from xml"#, description: "Converts xml formatted string to record", - result: Some(Value::Record { - cols: vec!["note".to_string()], - vals: vec![Value::Record { - cols: vec!["children".to_string(), "attributes".to_string()], - vals: vec![ - Value::List { - vals: vec![Value::Record { - cols: vec!["remember".to_string()], - vals: vec![Value::Record { - cols: vec!["children".to_string(), "attributes".to_string()], - vals: vec![ - Value::List { - vals: vec![Value::test_string("Event")], - span: Span::test_data(), - }, - Value::Record { - cols: vec![], - vals: vec![], - span: Span::test_data(), - }, - ], - span: Span::test_data(), - }], - span: Span::test_data(), - }], - span: Span::test_data(), - }, - Value::Record { - cols: vec![], - vals: vec![], - span: Span::test_data(), - }, - ], - span: Span::test_data(), - }], - span: Span::test_data(), - }), + result: Some(Value::test_record( + vec![COLUMN_TAG_NAME, COLUMN_ATTRS_NAME, COLUMN_CONTENT_NAME], + vec![ + Value::test_string("note"), + Value::test_record(Vec::<&str>::new(), vec![]), + Value::list( + vec![Value::test_record( + vec![COLUMN_TAG_NAME, COLUMN_ATTRS_NAME, COLUMN_CONTENT_NAME], + vec![ + Value::test_string("remember"), + Value::test_record(Vec::<&str>::new(), vec![]), + Value::list( + vec![Value::test_record( + vec![ + COLUMN_TAG_NAME, + COLUMN_ATTRS_NAME, + COLUMN_CONTENT_NAME, + ], + vec![ + Value::test_nothing(), + Value::test_nothing(), + Value::test_string("Event"), + ], + )], + Span::test_data(), + ), + ], + )], + Span::test_data(), + ), + ], + )), }] } } -fn from_attributes_to_value(attributes: &[roxmltree::Attribute], span: Span) -> Value { +struct ParsingInfo { + span: Span, + keep_comments: bool, + keep_processing_instructions: bool, +} + +fn from_attributes_to_value(attributes: &[roxmltree::Attribute], info: &ParsingInfo) -> Value { let mut collected = IndexMap::new(); for a in attributes { - collected.insert(String::from(a.name()), Value::string(a.value(), span)); + collected.insert(String::from(a.name()), Value::string(a.value(), info.span)); } let (cols, vals) = collected @@ -97,97 +125,205 @@ fn from_attributes_to_value(attributes: &[roxmltree::Attribute], span: Span) -> acc }); - Value::Record { cols, vals, span } -} - -fn from_node_to_value(n: &roxmltree::Node, span: Span) -> Value { - if n.is_element() { - let name = n.tag_name().name().trim().to_string(); - - let mut children_values = vec![]; - for c in n.children() { - children_values.push(from_node_to_value(&c, span)); - } - - let children_values: Vec = children_values - .into_iter() - .filter(|x| match x { - Value::String { val: f, .. } => { - !f.trim().is_empty() // non-whitespace characters? - } - _ => true, - }) - .collect(); - - let mut collected = IndexMap::new(); - - let attribute_value: Value = - from_attributes_to_value(&n.attributes().collect::>(), span); - - let mut row = IndexMap::new(); - row.insert( - String::from("children"), - Value::List { - vals: children_values, - span, - }, - ); - row.insert(String::from("attributes"), attribute_value); - collected.insert(name, Value::from(Spanned { item: row, span })); - - Value::from(Spanned { - item: collected, - span, - }) - } else if n.is_comment() { - Value::String { - val: "".to_string(), - span, - } - } else if n.is_pi() { - Value::String { - val: "".to_string(), - span, - } - } else if n.is_text() { - match n.text() { - Some(text) => Value::String { - val: text.to_string(), - span, - }, - None => Value::String { - val: "".to_string(), - span, - }, - } - } else { - Value::String { - val: "".to_string(), - span, - } + Value::Record { + cols, + vals, + span: info.span, } } -fn from_document_to_value(d: &roxmltree::Document, span: Span) -> Value { - from_node_to_value(&d.root_element(), span) +fn element_to_value(n: &roxmltree::Node, info: &ParsingInfo) -> Value { + let span = info.span; + let mut node = IndexMap::new(); + + let tag = n.tag_name().name().trim().to_string(); + let tag = Value::string(tag, span); + + let content: Vec = n + .children() + .into_iter() + .filter_map(|node| from_node_to_value(&node, info)) + .collect(); + let content = Value::list(content, span); + + let attributes = from_attributes_to_value(&n.attributes().collect::>(), info); + + node.insert(String::from(COLUMN_TAG_NAME), tag); + node.insert(String::from(COLUMN_ATTRS_NAME), attributes); + node.insert(String::from(COLUMN_CONTENT_NAME), content); + + Value::from(Spanned { item: node, span }) } -pub fn from_xml_string_to_value(s: String, span: Span) -> Result { +fn text_to_value(n: &roxmltree::Node, info: &ParsingInfo) -> Option { + let span = info.span; + let text = n.text().expect("Non-text node supplied to text_to_value"); + let text = text.trim(); + if text.is_empty() { + None + } else { + let mut node = IndexMap::new(); + let content = Value::string(String::from(text), span); + + node.insert(String::from(COLUMN_TAG_NAME), Value::nothing(span)); + node.insert(String::from(COLUMN_ATTRS_NAME), Value::nothing(span)); + node.insert(String::from(COLUMN_CONTENT_NAME), content); + + let result = Value::from(Spanned { item: node, span }); + + Some(result) + } +} + +fn comment_to_value(n: &roxmltree::Node, info: &ParsingInfo) -> Option { + if info.keep_comments { + let span = info.span; + let text = n + .text() + .expect("Non-comment node supplied to comment_to_value"); + + let mut node = IndexMap::new(); + let content = Value::string(String::from(text), span); + + node.insert(String::from(COLUMN_TAG_NAME), Value::string("!", span)); + node.insert(String::from(COLUMN_ATTRS_NAME), Value::nothing(span)); + node.insert(String::from(COLUMN_CONTENT_NAME), content); + + let result = Value::from(Spanned { item: node, span }); + + Some(result) + } else { + None + } +} + +fn processing_instruction_to_value(n: &roxmltree::Node, info: &ParsingInfo) -> Option { + if info.keep_processing_instructions { + let span = info.span; + let pi = n.pi()?; + + let mut node = IndexMap::new(); + // Add '?' before target to differentiate tags from pi targets + let tag = format!("?{}", pi.target); + let tag = Value::string(tag, span); + let content = pi + .value + .map_or_else(|| Value::nothing(span), |x| Value::string(x, span)); + + node.insert(String::from(COLUMN_TAG_NAME), tag); + node.insert(String::from(COLUMN_ATTRS_NAME), Value::nothing(span)); + node.insert(String::from(COLUMN_CONTENT_NAME), content); + + let result = Value::from(Spanned { item: node, span }); + + Some(result) + } else { + None + } +} + +fn from_node_to_value(n: &roxmltree::Node, info: &ParsingInfo) -> Option { + match n.node_type() { + NodeType::Element => Some(element_to_value(n, info)), + NodeType::Text => text_to_value(n, info), + NodeType::Comment => comment_to_value(n, info), + NodeType::PI => processing_instruction_to_value(n, info), + _ => None, + } +} + +fn from_document_to_value(d: &roxmltree::Document, info: &ParsingInfo) -> Value { + element_to_value(&d.root_element(), info) +} + +fn from_xml_string_to_value(s: String, info: &ParsingInfo) -> Result { let parsed = roxmltree::Document::parse(&s)?; - Ok(from_document_to_value(&parsed, span)) + Ok(from_document_to_value(&parsed, info)) } -fn from_xml(input: PipelineData, head: Span) -> Result { - let (concat_string, span, metadata) = input.collect_string_strict(head)?; +fn from_xml(input: PipelineData, info: &ParsingInfo) -> Result { + let (concat_string, span, metadata) = input.collect_string_strict(info.span)?; - match from_xml_string_to_value(concat_string, head) { + match from_xml_string_to_value(concat_string, info) { Ok(x) => Ok(x.into_pipeline_data_with_metadata(metadata)), - _ => Err(ShellError::UnsupportedInput( - "Could not parse string as XML".to_string(), - "value originates from here".into(), - head, + Err(err) => Err(process_xml_parse_error(err, span)), + } +} + +fn process_xml_parse_error(err: roxmltree::Error, span: Span) -> ShellError { + match err { + roxmltree::Error::InvalidXmlPrefixUri(_) => make_cant_convert_error( + "The `xmlns:xml` attribute must have an URI.", span, - )), + ), + roxmltree::Error::UnexpectedXmlUri(_) => make_cant_convert_error( + "Only the xmlns:xml attribute can have the http://www.w3.org/XML/1998/namespace URI.", + span, + ), + roxmltree::Error::UnexpectedXmlnsUri(_) => make_cant_convert_error( + "The http://www.w3.org/2000/xmlns/ URI must not be declared.", + span, + ), + roxmltree::Error::InvalidElementNamePrefix(_) => { + make_cant_convert_error("xmlns can't be used as an element prefix.", span) + } + roxmltree::Error::DuplicatedNamespace(_, _) => { + make_cant_convert_error("A namespace was already defined on this element.", span) + } + roxmltree::Error::UnknownNamespace(prefix, _) => { + make_cant_convert_error(format!("Unknown prefix {}", prefix), span) + } + roxmltree::Error::UnexpectedCloseTag { .. } => { + make_cant_convert_error("Unexpected close tag", span) + } + roxmltree::Error::UnexpectedEntityCloseTag(_) => { + make_cant_convert_error("Entity value starts with a close tag.", span) + } + roxmltree::Error::UnknownEntityReference(_, _) => make_cant_convert_error( + "A reference to an entity that was not defined in the DTD.", + span, + ), + roxmltree::Error::MalformedEntityReference(_) => { + make_cant_convert_error("A malformed entity reference.", span) + } + roxmltree::Error::EntityReferenceLoop(_) => { + make_cant_convert_error("A possible entity reference loop.", span) + } + roxmltree::Error::InvalidAttributeValue(_) => { + make_cant_convert_error("Attribute value cannot have a < character.", span) + } + roxmltree::Error::DuplicatedAttribute(_, _) => { + make_cant_convert_error("An element has a duplicated attributes.", span) + } + roxmltree::Error::NoRootNode => { + make_cant_convert_error("The XML document must have at least one element.", span) + } + roxmltree::Error::UnclosedRootNode => { + make_cant_convert_error("The root node was opened but never closed.", span) + } + roxmltree::Error::DtdDetected => make_cant_convert_error( + "An XML with DTD detected. DTDs are currently disabled due to security reasons.", + span, + ), + roxmltree::Error::NodesLimitReached => { + make_cant_convert_error("Node limit was reached.", span) + } + roxmltree::Error::AttributesLimitReached => { + make_cant_convert_error("Attribute limit reached", span) + } + roxmltree::Error::NamespacesLimitReached => { + make_cant_convert_error("Namespace limit reached", span) + } + roxmltree::Error::ParserError(_) => make_cant_convert_error("Parser error", span), + } +} + +fn make_cant_convert_error(help: impl Into, span: Span) -> ShellError { + ShellError::CantConvert { + from_type: Type::String.to_string(), + to_type: "XML".to_string(), + span, + help: Some(help.into()), } } @@ -203,9 +339,12 @@ mod tests { Value::test_string(input) } - fn row(entries: IndexMap) -> Value { + fn attributes(entries: IndexMap<&str, &str>) -> Value { Value::from(Spanned { - item: entries, + item: entries + .into_iter() + .map(|(k, v)| (k.into(), string(v))) + .collect::>(), span: Span::test_data(), }) } @@ -217,23 +356,46 @@ mod tests { } } + fn content_tag( + tag: impl Into, + attrs: IndexMap<&str, &str>, + content: &[Value], + ) -> Value { + Value::from(Spanned { + item: indexmap! { + COLUMN_TAG_NAME.into() => string(tag), + COLUMN_ATTRS_NAME.into() => attributes(attrs), + COLUMN_CONTENT_NAME.into() => table(content), + }, + span: Span::test_data(), + }) + } + + fn content_string(value: impl Into) -> Value { + Value::from(Spanned { + item: indexmap! { + COLUMN_TAG_NAME.into() => Value::nothing(Span::test_data()), + COLUMN_ATTRS_NAME.into() => Value::nothing(Span::test_data()), + COLUMN_CONTENT_NAME.into() => string(value), + }, + span: Span::test_data(), + }) + } + fn parse(xml: &str) -> Result { - from_xml_string_to_value(xml.to_string(), Span::test_data()) + let info = ParsingInfo { + span: Span::test_data(), + keep_comments: false, + keep_processing_instructions: false, + }; + from_xml_string_to_value(xml.to_string(), &info) } #[test] fn parses_empty_element() -> Result<(), roxmltree::Error> { let source = ""; - assert_eq!( - parse(source)?, - row(indexmap! { - "nu".into() => row(indexmap! { - "children".into() => table(&[]), - "attributes".into() => row(indexmap! {}) - }) - }) - ); + assert_eq!(parse(source)?, content_tag("nu", indexmap! {}, &vec![])); Ok(()) } @@ -244,12 +406,11 @@ mod tests { assert_eq!( parse(source)?, - row(indexmap! { - "nu".into() => row(indexmap! { - "children".into() => table(&[string("La era de los tres caballeros")]), - "attributes".into() => row(indexmap! {}) - }) - }) + content_tag( + "nu", + indexmap! {}, + &vec![content_string("La era de los tres caballeros")] + ) ); Ok(()) @@ -266,31 +427,15 @@ mod tests { assert_eq!( parse(source)?, - row(indexmap! { - "nu".into() => row(indexmap! { - "children".into() => table(&[ - row(indexmap! { - "dev".into() => row(indexmap! { - "children".into() => table(&[string("Andrés")]), - "attributes".into() => row(indexmap! {}) - }) - }), - row(indexmap! { - "dev".into() => row(indexmap! { - "children".into() => table(&[string("Jonathan")]), - "attributes".into() => row(indexmap! {}) - }) - }), - row(indexmap! { - "dev".into() => row(indexmap! { - "children".into() => table(&[string("Yehuda")]), - "attributes".into() => row(indexmap! {}) - }) - }) - ]), - "attributes".into() => row(indexmap! {}) - }) - }) + content_tag( + "nu", + indexmap! {}, + &vec![ + content_tag("dev", indexmap! {}, &vec![content_string("Andrés")]), + content_tag("dev", indexmap! {}, &vec![content_string("Jonathan")]), + content_tag("dev", indexmap! {}, &vec![content_string("Yehuda")]) + ] + ) ); Ok(()) @@ -304,14 +449,7 @@ mod tests { assert_eq!( parse(source)?, - row(indexmap! { - "nu".into() => row(indexmap! { - "children".into() => table(&[]), - "attributes".into() => row(indexmap! { - "version".into() => string("2.0") - }) - }) - }) + content_tag("nu", indexmap! {"version" => "2.0"}, &vec![]) ); Ok(()) @@ -326,21 +464,15 @@ mod tests { assert_eq!( parse(source)?, - row(indexmap! { - "nu".into() => row(indexmap! { - "children".into() => table(&[ - row(indexmap! { - "version".into() => row(indexmap! { - "children".into() => table(&[string("2.0")]), - "attributes".into() => row(indexmap! {}) - }) - }) - ]), - "attributes".into() => row(indexmap! { - "version".into() => string("2.0") - }) - }) - }) + content_tag( + "nu", + indexmap! {"version" => "2.0"}, + &vec![content_tag( + "version", + indexmap! {}, + &vec![content_string("2.0")] + )] + ) ); Ok(()) @@ -354,15 +486,7 @@ mod tests { assert_eq!( parse(source)?, - row(indexmap! { - "nu".into() => row(indexmap! { - "children".into() => table(&[]), - "attributes".into() => row(indexmap! { - "version".into() => string("2.0"), - "age".into() => string("25") - }) - }) - }) + content_tag("nu", indexmap! {"version" => "2.0", "age" => "25"}, &vec![]) ); Ok(()) diff --git a/crates/nu-command/src/formats/mod.rs b/crates/nu-command/src/formats/mod.rs index 86f06f85fd..43e54db51f 100644 --- a/crates/nu-command/src/formats/mod.rs +++ b/crates/nu-command/src/formats/mod.rs @@ -1,4 +1,5 @@ mod from; +mod nu_xml_format; mod to; pub use from::*; diff --git a/crates/nu-command/src/formats/nu_xml_format.rs b/crates/nu-command/src/formats/nu_xml_format.rs new file mode 100644 index 0000000000..83a1ce7777 --- /dev/null +++ b/crates/nu-command/src/formats/nu_xml_format.rs @@ -0,0 +1,3 @@ +pub const COLUMN_TAG_NAME: &str = "tag"; +pub const COLUMN_ATTRS_NAME: &str = "attributes"; +pub const COLUMN_CONTENT_NAME: &str = "content"; diff --git a/crates/nu-command/src/formats/to/xml.rs b/crates/nu-command/src/formats/to/xml.rs index 1d01554657..105549ecca 100644 --- a/crates/nu-command/src/formats/to/xml.rs +++ b/crates/nu-command/src/formats/to/xml.rs @@ -1,13 +1,13 @@ +use crate::formats::nu_xml_format::{COLUMN_ATTRS_NAME, COLUMN_CONTENT_NAME, COLUMN_TAG_NAME}; use indexmap::IndexMap; use nu_engine::CallExt; use nu_protocol::ast::Call; use nu_protocol::engine::{Command, EngineState, Stack}; use nu_protocol::{ - Category, Config, Example, IntoPipelineData, PipelineData, ShellError, Signature, Span, - Spanned, SyntaxShape, Type, Value, + Category, Example, IntoPipelineData, PipelineData, ShellError, Signature, Span, Spanned, + SyntaxShape, Type, Value, }; use quick_xml::events::{BytesEnd, BytesStart, BytesText, Event}; -use std::collections::HashSet; use std::io::Cursor; use std::io::Write; @@ -31,18 +31,36 @@ impl Command for ToXml { .category(Category::Formats) } + fn extra_usage(&self) -> &str { + r#"Every XML entry is represented via a record with tag, attribute and content fields. +To represent different types of entries different values must be written to this fields: +1. Tag entry: {tag: attrs: {: "" ...} content: []} +2. Comment entry: {tag: '!' attrs: null content: ""} +3. Processing instruction (PI): {tag: '?' attrs: null content: ""} +4. Text: {tag: null attrs: null content: ""}. Or as plain "" instead of record. + +Additionally any field which is: empty record, empty list or null, can be omitted."# + } + fn examples(&self) -> Vec { vec![ Example { description: "Outputs an XML string representing the contents of this table", - example: r#"{ "note": { "children": [{ "remember": {"attributes" : {}, "children": [Event]}}], "attributes": {} } } | to xml"#, + example: r#"{tag: note attributes: {} content : [{tag: remember attributes: {} content : [{tag: null attrs: null content : Event}]}]} | to xml"#, + result: Some(Value::test_string( + "Event", + )), + }, + Example { + description: "When formatting xml null and empty record fields can be omitted and strings can be written without a wrapping record", + example: r#"{tag: note content : [{tag: remember content : [Event]}]} | to xml"#, result: Some(Value::test_string( "Event", )), }, Example { description: "Optionally, formats the text with a custom indentation setting", - example: r#"{ "note": { "children": [{ "remember": {"attributes" : {}, "children": [Event]}}], "attributes": {} } } | to xml -p 3"#, + example: r#"{tag: note content : [{tag: remember content : [Event]}]} | to xml -p 3"#, result: Some(Value::test_string( "\n Event\n", )), @@ -51,7 +69,7 @@ impl Command for ToXml { } fn usage(&self) -> &str { - "Convert table into .xml text." + "Convert special record structure into .xml text." } fn run( @@ -62,110 +80,316 @@ impl Command for ToXml { input: PipelineData, ) -> Result { let head = call.head; - let config = engine_state.get_config(); let pretty: Option> = call.get_flag(engine_state, stack, "pretty")?; let input = input.try_expand_range()?; - to_xml(input, head, pretty, config) + to_xml(input, head, pretty) } } -pub fn add_attributes<'a>( - element: &mut quick_xml::events::BytesStart<'a>, - attributes: &'a IndexMap, -) { +pub fn add_attributes<'a>(element: &mut BytesStart<'a>, attributes: &'a IndexMap) { for (k, v) in attributes { element.push_attribute((k.as_str(), v.as_str())); } } -pub fn get_attributes(row: &Value, config: &Config) -> Option> { - if let Value::Record { .. } = row { - if let Some(Value::Record { cols, vals, .. }) = row.get_data_by_key("attributes") { - let mut h = IndexMap::new(); - for (k, v) in cols.iter().zip(vals.iter()) { - h.insert(k.clone(), v.clone().into_abbreviated_string(config)); - } - return Some(h); - } - } - None -} - -pub fn get_children(row: &Value) -> Option> { - if let Value::Record { .. } = row { - if let Some(Value::List { vals, .. }) = row.get_data_by_key("children") { - return Some(vals); - } - } - None -} - -pub fn is_xml_row(row: &Value) -> bool { - if let Value::Record { cols, .. } = &row { - let keys: HashSet<&String> = cols.iter().collect(); - let children: String = "children".to_string(); - let attributes: String = "attributes".to_string(); - return keys.contains(&children) && keys.contains(&attributes) && keys.len() == 2; - } - false -} - -pub fn write_xml_events( - current: Value, +fn to_xml_entry( + entry: Value, + top_level: bool, writer: &mut quick_xml::Writer, - config: &Config, ) -> Result<(), ShellError> { - match current { - Value::Record { cols, vals, span } => { - for (k, v) in cols.iter().zip(vals.iter()) { - let mut e = BytesStart::new(k); - if !is_xml_row(v) { - return Err(ShellError::GenericError( - "Expected a row with 'children' and 'attributes' columns".to_string(), - "missing 'children' and 'attributes' columns ".to_string(), - Some(span), - None, - Vec::new(), - )); - } - let a = get_attributes(v, config); - if let Some(ref a) = a { - add_attributes(&mut e, a); - } - writer - .write_event(Event::Start(e)) - .expect("Couldn't open XML node"); - let c = get_children(v); - if let Some(c) = c { - for v in c { - write_xml_events(v, writer, config)?; - } - } - writer - .write_event(Event::End(BytesEnd::new(k))) - .expect("Couldn't close XML node"); + let entry_span = entry.span()?; + + // Allow using strings directly as content. + // So user can write + // {tag: a content: ['qwe']} + // instead of longer + // {tag: a content: [{content: 'qwe'}]} + if let (Value::String { val, span }, false) = (&entry, top_level) { + return to_xml_text(val.as_str(), *span, writer); + } + + if !matches!(entry, Value::Record { .. }) { + return Err(ShellError::CantConvert { + to_type: "XML".into(), + from_type: entry.get_type().to_string(), + span: entry_span, + help: Some("Xml entry expected to be a record".into()), + }); + }; + + // If key is not found it is assumed to be nothing. This way + // user can write a tag like {tag: a content: [...]} instead + // of longer {tag: a attributes: {} content: [...]} + let tag = entry + .get_data_by_key(COLUMN_TAG_NAME) + .unwrap_or_else(|| Value::nothing(Span::unknown())); + let attrs = entry + .get_data_by_key(COLUMN_ATTRS_NAME) + .unwrap_or_else(|| Value::nothing(Span::unknown())); + let content = entry + .get_data_by_key(COLUMN_CONTENT_NAME) + .unwrap_or_else(|| Value::nothing(Span::unknown())); + + match (tag, attrs, content) { + (Value::Nothing { .. }, Value::Nothing { .. }, Value::String { val, span }) => { + // Strings can not appear on top level of document + if top_level { + return Err(ShellError::CantConvert { + to_type: "XML".into(), + from_type: entry.get_type().to_string(), + span: entry_span, + help: Some("Strings can not be a root element of document".into()), + }); } + to_xml_text(val.as_str(), span, writer) } - Value::List { vals, .. } => { - for v in vals { - write_xml_events(v, writer, config)?; + ( + Value::String { + val: tag_name, + span: tag_span, + }, + attrs, + children, + ) => to_tag_like( + entry_span, tag_name, tag_span, attrs, children, top_level, writer, + ), + _ => Ok(()), + } +} + +/// Convert record to tag-like entry: tag, PI, comment. +fn to_tag_like( + entry_span: Span, + tag: String, + tag_span: Span, + attrs: Value, + content: Value, + top_level: bool, + writer: &mut quick_xml::Writer, +) -> Result<(), ShellError> { + if tag == "!" { + // Comments can not appear on top level of document + if top_level { + return Err(ShellError::CantConvert { + to_type: "XML".into(), + from_type: "record".into(), + span: entry_span, + help: Some("Comments can not be a root element of document".into()), + }); + } + + to_comment(entry_span, attrs, content, writer) + } else if let Some(tag) = tag.strip_prefix('?') { + // PIs can not appear on top level of document + if top_level { + return Err(ShellError::CantConvert { + to_type: "XML".into(), + from_type: Type::Record(vec![]).to_string(), + span: entry_span, + help: Some("PIs can not be a root element of document".into()), + }); + } + + let content: String = match content { + Value::String { val, .. } => val, + Value::Nothing { .. } => "".into(), + _ => { + return Err(ShellError::CantConvert { + to_type: "XML".into(), + from_type: Type::Record(vec![]).to_string(), + span: content.span()?, + help: Some("PI content expected to be a string".into()), + }); } - } - _ => { - let s = current.into_abbreviated_string(config); + }; + + to_processing_instruction(entry_span, tag, attrs, content, writer) + } else { + // Allow tag to have no attributes or content for short hand input + // alternatives like {tag: a attributes: {} content: []}, {tag: a attribbutes: null + // content: null}, {tag: a}. See to_xml_entry for more + let (attr_cols, attr_values) = match attrs { + Value::Record { cols, vals, .. } => (cols, vals), + Value::Nothing { .. } => (Vec::new(), Vec::new()), + _ => { + return Err(ShellError::CantConvert { + to_type: "XML".into(), + from_type: attrs.get_type().to_string(), + span: attrs.span()?, + help: Some("Tag attributes expected to be a record".into()), + }); + } + }; + + let content = match content { + Value::List { vals, .. } => vals, + Value::Nothing { .. } => Vec::new(), + _ => { + return Err(ShellError::CantConvert { + to_type: "XML".into(), + from_type: content.get_type().to_string(), + span: content.span()?, + help: Some("Tag content expected to be a list".into()), + }); + } + }; + + to_tag( + entry_span, + tag, + tag_span, + attr_cols, + attr_values, + content, + writer, + ) + } +} + +fn to_comment( + entry_span: Span, + attrs: Value, + content: Value, + writer: &mut quick_xml::Writer, +) -> Result<(), ShellError> { + match (attrs, content) { + (Value::Nothing { .. }, Value::String { val, .. }) => { + let comment_content = BytesText::new(val.as_str()); writer - .write_event(Event::Text(BytesText::from_escaped(s.as_str()))) - .expect("Couldn't write XML text"); + .write_event(Event::Comment(comment_content)) + .map_err(|_| ShellError::CantConvert { + to_type: "XML".to_string(), + from_type: Type::Record(vec![]).to_string(), + span: entry_span, + help: Some("Failure writing comment to xml".into()), + }) + } + (_, content) => Err(ShellError::CantConvert { + to_type: "XML".into(), + from_type: content.get_type().to_string(), + span: entry_span, + help: Some("Comment expected to have string content and no attributes".into()), + }), + } +} + +fn to_processing_instruction( + entry_span: Span, + tag: &str, + attrs: Value, + content: String, + writer: &mut quick_xml::Writer, +) -> Result<(), ShellError> { + if !matches!(attrs, Value::Nothing { .. }) { + return Err(ShellError::CantConvert { + to_type: "XML".into(), + from_type: Type::Record(vec![]).to_string(), + span: entry_span, + help: Some("PIs do not have attributes".into()), + }); + } + + let content_text = format!("{} {}", tag, content); + let pi_content = BytesText::new(content_text.as_str()); + writer + .write_event(Event::PI(pi_content)) + .map_err(|_| ShellError::CantConvert { + to_type: "XML".to_string(), + from_type: Type::Record(vec![]).to_string(), + span: entry_span, + help: Some("Failure writing PI to xml".into()), + }) +} + +fn to_tag( + entry_span: Span, + tag: String, + tag_span: Span, + attr_cols: Vec, + attr_vals: Vec, + children: Vec, + writer: &mut quick_xml::Writer, +) -> Result<(), ShellError> { + if tag.starts_with('!') || tag.starts_with('?') { + return Err(ShellError::CantConvert { + to_type: "XML".to_string(), + from_type: Type::Record(vec![]).to_string(), + span: tag_span, + help: Some(format!( + "Incorrect tag name {}, tag name can not start with ! or ?", + tag + )), + }); + } + + let attributes = parse_attributes(attr_cols, attr_vals)?; + let mut open_tag_event = BytesStart::new(tag.clone()); + add_attributes(&mut open_tag_event, &attributes); + + writer + .write_event(Event::Start(open_tag_event)) + .map_err(|_| ShellError::CantConvert { + to_type: "XML".to_string(), + from_type: Type::Record(vec![]).to_string(), + span: entry_span, + help: Some("Failure writing tag to xml".into()), + })?; + + children + .into_iter() + .try_for_each(|child| to_xml_entry(child, false, writer))?; + + let close_tag_event = BytesEnd::new(tag); + writer + .write_event(Event::End(close_tag_event)) + .map_err(|_| ShellError::CantConvert { + to_type: "XML".to_string(), + from_type: Type::Record(vec![]).to_string(), + span: entry_span, + help: Some("Failure writing tag to xml".into()), + }) +} + +fn parse_attributes( + cols: Vec, + vals: Vec, +) -> Result, ShellError> { + let mut h = IndexMap::new(); + for (k, v) in cols.into_iter().zip(vals.into_iter()) { + if let Value::String { val, .. } = v { + h.insert(k, val); + } else { + return Err(ShellError::CantConvert { + to_type: "XML".to_string(), + from_type: v.get_type().to_string(), + span: v.span()?, + help: Some("Attribute value expected to be a string".into()), + }); } } - Ok(()) + Ok(h) +} + +fn to_xml_text( + val: &str, + span: Span, + writer: &mut quick_xml::Writer, +) -> Result<(), ShellError> { + let text = Event::Text(BytesText::new(val)); + writer + .write_event(text) + .map_err(|_| ShellError::CantConvert { + to_type: "XML".to_string(), + from_type: Type::String.to_string(), + span, + help: Some("Failure writing string to xml".into()), + }) } fn to_xml( input: PipelineData, head: Span, pretty: Option>, - config: &Config, ) -> Result { let mut w = pretty.as_ref().map_or_else( || quick_xml::Writer::new(Cursor::new(Vec::new())), @@ -173,25 +397,16 @@ fn to_xml( ); let value = input.into_value(head); - let value_type = value.get_type(); - match write_xml_events(value, &mut w, config) { - Ok(_) => { - let b = w.into_inner().into_inner(); - let s = if let Ok(s) = String::from_utf8(b) { - s - } else { - return Err(ShellError::NonUtf8(head)); - }; - Ok(Value::string(s, head).into_pipeline_data()) - } - Err(_) => Err(ShellError::CantConvert { - to_type: "XML".into(), - from_type: value_type.to_string(), - span: head, - help: None, - }), - } + to_xml_entry(value, true, &mut w).and_then(|_| { + let b = w.into_inner().into_inner(); + let s = if let Ok(s) = String::from_utf8(b) { + s + } else { + return Err(ShellError::NonUtf8(head)); + }; + Ok(Value::string(s, head).into_pipeline_data()) + }) } #[cfg(test)] diff --git a/crates/nu-command/tests/commands/open.rs b/crates/nu-command/tests/commands/open.rs index f7337e8bde..0e692e859e 100644 --- a/crates/nu-command/tests/commands/open.rs +++ b/crates/nu-command/tests/commands/open.rs @@ -179,7 +179,18 @@ fn parses_json() { fn parses_xml() { let actual = nu!( cwd: "tests/fixtures/formats", - "open jonathan.xml | get rss.children.channel.children | get 0.3.item.children | get 3.link.children.0" + pipeline(r#" + open jonathan.xml + | get content + | where tag == channel + | get content + | flatten + | where tag == item + | get content + | flatten + | where tag == guid + | get content.0.content.0 + "#) ); assert_eq!( diff --git a/crates/nu-command/tests/format_conversions/xml.rs b/crates/nu-command/tests/format_conversions/xml.rs index 15a62d6907..5f05816b44 100644 --- a/crates/nu-command/tests/format_conversions/xml.rs +++ b/crates/nu-command/tests/format_conversions/xml.rs @@ -8,7 +8,15 @@ fn table_to_xml_text_and_from_xml_text_back_into_table() { open jonathan.xml | to xml | from xml - | get rss.children.channel.children.0.3.item.children.guid.4.attributes.isPermaLink + | get content + | where tag == channel + | get content + | flatten + | where tag == item + | get content + | flatten + | where tag == guid + | get 0.attributes.isPermaLink "# ));